From 31b6aa851cb5daa7f8b0ba800bbf2912d1472c44 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <gregcaporaso@gmail.com>
Date: Tue, 13 May 2014 21:28:31 -0700
Subject: [PATCH] added default moltype - this should have been part of #19

---
 brokit/infernal.py | 639 ++++++++++++++++++++++-----------------------
 1 file changed, 319 insertions(+), 320 deletions(-)
diff --git a/brokit/infernal.py b/brokit/infernal.py
index 745ca99..ec8c8ce 100644
--- a/brokit/infernal.py
+++ b/brokit/infernal.py
@@ -22,7 +22,7 @@
                'RNA':'--rna',\
                 RNA:'--rna',\
                }
-               
+
 SEQ_CONSTRUCTOR_MAP = {'DNA':ChangedDnaSequence,\
                         DNA:ChangedDnaSequence,\
                        'RNA':ChangedRnaSequence,\
@@ -32,128 +32,128 @@
 class Cmalign(CommandLineApplication):
     """cmalign application controller."""
     _options = {
- 
+
     # -o <f> Save the alignment in Stockholm format to a file <f>. The default
     #   is to write it to standard output.
     '-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' '),\
-    
+
     # -l Turn on the local alignment algorithm. Default is global.
     '-l':FlagParameter(Prefix='-',Name='l'),\
-    
+
     # -p Annotate the alignment with posterior probabilities calculated using
     #   the Inside and Outside algorithms.
     '-p':FlagParameter(Prefix='-',Name='p'),\
-    
+
     # -q Quiet; suppress the verbose banner, and only print the resulting
     #   alignment to stdout.
     '-q':FlagParameter(Prefix='-',Name='q'),\
-    
+
     # --informat <s> Assert that the input seqfile is in format <s>. Do not run
     #   Babelfish format autodection. Acceptable formats are: FASTA, EMBL,
     #   UNIPROT, GENBANK, and DDBJ. <s> is case-insensitive.
     '--informat':ValuedParameter(Prefix='--',Name='informat',Delimiter=' '),\
-    
+
     # --mpi Run as an MPI parallel program.  (see User's Guide for details).
     '--mpi':FlagParameter(Prefix='--',Name='mpi'),\
-    
-    # Expert Options 
-    
-    # --optacc Align sequences using the Durbin/Holmes optimal accuracy 
-    #   algorithm. This is default behavior, so this option is probably useless. 
+
+    # Expert Options
+
+    # --optacc Align sequences using the Durbin/Holmes optimal accuracy
+    #   algorithm. This is default behavior, so this option is probably useless.
     '--optacc':FlagParameter(Prefix='--',Name='optacc'),\
-    
-    # --cyk Do not use the Durbin/Holmes optimal accuracy alignment to align the 
+
+    # --cyk Do not use the Durbin/Holmes optimal accuracy alignment to align the
     #   sequences, instead use the CYK algorithm which determines the optimally
-    #   scoring alignment of the sequence to the model. 
+    #   scoring alignment of the sequence to the model.
     '--cyk':FlagParameter(Prefix='--',Name='cyk'),\
-    
+
     # --sample Sample an alignment from the posterior distribution of
     #   alignments.
     '--sample':FlagParameter(Prefix='--',Name='sample'),\
-    
-    # -s <n> Set the random number generator seed to <n>, where <n> is a 
-    #   positive integer. This option can only be used in combination with 
+
+    # -s <n> Set the random number generator seed to <n>, where <n> is a
+    #   positive integer. This option can only be used in combination with
     #   --sample. The default is to use time() to generate a different seed for
     #   each run, which means that two different runs of cmalign --sample on the
     #   same alignment will give slightly different results. You can use this
     #   option to generate reproducible results.
     '-s':ValuedParameter(Prefix='-',Name='s',Delimiter=' '),\
-    
+
     # --viterbi Do not use the CM to align the sequences, instead use the HMM
     #   Viterbi algorithm to align with a CM Plan 9 HMM.
     '--viterbi':FlagParameter(Prefix='--',Name='viterbi'),\
-    
+
     # --sub Turn on the sub model construction and alignment procedure.
     '--sub':FlagParameter(Prefix='--',Name='sub'),\
-    
-    # --small Use the divide and conquer CYK alignment algorithm described in 
+
+    # --small Use the divide and conquer CYK alignment algorithm described in
     #   SR Eddy, BMC Bioinformatics 3:18, 2002.
     '--small':FlagParameter(Prefix='--',Name='small'),\
-    
+
     # --hbanded This option is turned on by default. Accelerate alignment by
     #   pruning away regions of the CM DP matrix that are deemed negligible by
     #   an HMM.
     '--hbanded':FlagParameter(Prefix='--',Name='hbanded'),\
-    
+
     # --nonbanded Turns off HMM banding.
     '--nonbanded':FlagParameter(Prefix='--',Name='nonbanded'),\
-    
+
     # --tau <x> Set the tail loss probability used during HMM band calculation
     #   to <x>.
     '--tau':ValuedParameter(Prefix='--',Name='tau',Delimiter=' '),\
-    
+
     # --mxsize <x> Set the maximum allowable DP matrix size to <x> megabytes.
     '--mxsize':ValuedParameter(Prefix='--',Name='mxsize',Delimiter=' '),\
 
     # --rna Output the alignments as RNA sequence alignments. This is true by
     #   default.
     '--rna':FlagParameter(Prefix='--',Name='rna'),\
-    
+
     # --dna Output the alignments as DNA sequence alignments.
     '--dna':FlagParameter(Prefix='--',Name='dna'),\
-    
+
     # --matchonly Only include match columns in the output alignment, do not
     #   include any insertions relative to the consensus model.
     '--matchonly':FlagParameter(Prefix='--',Name='matchonly'),\
-    
+
     # --resonly Only include match columns in the output alignment that have at
     #   least 1 residue (non-gap character) in them.
     '--resonly':FlagParameter(Prefix='--',Name='resonly'),\
-    
-    # --fins Change the behavior of how insert emissions are placed in the 
+
+    # --fins Change the behavior of how insert emissions are placed in the
     #   alignment.
     '--fins':FlagParameter(Prefix='--',Name='fins'),\
-    
+
     # --onepost Modifies behavior of the -p option. Use only one character
     #   instead of two to annotate the posterior probability of each aligned
     #   residue.
     '--onepost':FlagParameter(Prefix='--',Name='onepost'),\
-    
+
     # --withali <f> Reads an alignment from file <f> and aligns it as a single
     #   object to the CM; e.g. the alignment in <f> is held fixed.
     '--withali':ValuedParameter(Prefix='--',Name='withali',Delimiter=' '),\
-    
+
     # --withpknots Must be used in combination with --withali <f>. Propogate
     #   structural information for any pseudoknots that exist in <f> to the
     #   output alignment.
     '--withpknots':FlagParameter(Prefix='--',Name='withpknots'),\
-    
+
     # --rf Must be used in combination with --withali <f>. Specify that the
     #   alignment in <f> has the same "#=GC RF" annotation as the alignment file
-    #   the CM was built from using cmbuild and further that the --rf option was 
+    #   the CM was built from using cmbuild and further that the --rf option was
     #   supplied to cmbuild when the CM was constructed.
     '--rf':FlagParameter(Prefix='--',Name='rf'),\
-    
+
     # --gapthresh <x> Must be used in combination with --withali <f>. Specify
     #   that the --gapthresh <x> option was supplied to cmbuild when the CM was
     #   constructed from the alignment file <f>.
     '--gapthresh':ValuedParameter(Prefix='--',Name='gapthresh',Delimiter=' '),\
-    
+
     # --tfile <f> Dump tabular sequence tracebacks for each individual sequence
     #   to a file <f>. Primarily useful for debugging.
     '--tfile':ValuedParameter(Prefix='--',Name='tfile',Delimiter=' '),\
-    
-    
+
+
     }
     _parameters = {}
     _parameters.update(_options)
@@ -162,20 +162,20 @@ class Cmalign(CommandLineApplication):
 
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
         http://infernal.janelia.org/
         """
         return help_str
-    
+
     def _tempfile_as_multiline_string(self, data):
         """Write a multiline string to a temp file and return the filename.
 
             data: a multiline string to be written to a file.
 
-           * Note: the result will be the filename as a FilePath object 
+           * Note: the result will be the filename as a FilePath object
             (which is a string subclass).
 
         """
@@ -186,7 +186,7 @@ def _tempfile_as_multiline_string(self, data):
         return filename
 
     def _alignment_out_filename(self):
-        
+
         if self.Parameters['-o'].isOn():
             refined_filename = self._absolute(str(\
                 self.Parameters['-o'].Value))
@@ -199,191 +199,191 @@ def _get_result_paths(self,data):
         if self.Parameters['-o'].isOn():
             out_name = self._alignment_out_filename()
             result['Alignment'] = ResultPath(Path=out_name,IsWritten=True)
-        
+
         return result
 
 class Cmbuild(CommandLineApplication):
     """cmbuild application controller."""
     _options = {
-    
+
     # -n <s> Name the covariance model <s>. (Does not work if alifile contains
     #   more than one alignment).
     '-n':ValuedParameter(Prefix='-',Name='n',Delimiter=' '),\
-    
+
     # -A Append the CM to cmfile, if cmfile already exists.
     '-A':FlagParameter(Prefix='-',Name='A'),\
-    
+
     # -F Allow cmfile to be overwritten. Normally, if cmfile already exists,
     #   cmbuild exits with an error unless the -A or -F option is set.
     '-F':FlagParameter(Prefix='-',Name='F'),\
-    
+
     # -v Run in verbose output mode instead of using the default single line
     #   tabular format. This output format is similar to that used by older
     #   versions of Infernal.
     '-v':FlagParameter(Prefix='-',Name='v'),\
-    
+
     # --iins Allow informative insert emissions for the CM. By default, all CM
     #   insert emission scores are set to 0.0 bits.
     '--iins':FlagParameter(Prefix='--',Name='iins'),\
-    
+
     # --Wbeta<x> Set the beta tail loss probability for query-dependent banding
     #   (QDB) to <x> The QDB algorithm is used to determine the maximium length
     #   of a hit to the model. For more information on QDB see (Nawrocki and
     #   Eddy, PLoS Computational Biology 3(3): e56).
     '--Wbeta':ValuedParameter(Prefix='--',Name='Wbeta',Delimiter=' '),\
-    
+
     # Expert Options
-    
+
     # --rsearch <f> Parameterize emission scores a la RSEARCH, using the
-    #   RIBOSUM matrix in file <f>. For more information see the RSEARCH 
+    #   RIBOSUM matrix in file <f>. For more information see the RSEARCH
     #   publication (Klein and Eddy, BMC Bioinformatics 4:44, 2003). Actually,
     #   the emission scores will not exactly With --rsearch enabled, all
     #   alignments in alifile must contain exactly one sequence or the --call
     #   option must also be enabled.
     '--rsearch':ValuedParameter(Prefix='--',Name='rsearch',Delimiter=' '),\
-     
+
     # --binary Save the model in a compact binary format. The default is a more
     #   readable ASCII text format.
     '--binary':FlagParameter(Prefix='--',Name='binary'),\
-    
+
     # --rf Use reference coordinate annotation (#=GC RF line, in Stockholm) to
     #   determine which columns are consensus, and which are inserts.
     '--rf':FlagParameter(Prefix='--',Name='rf'),\
-    
+
     # --gapthresh <x> Set the gap threshold (used for determining which columns
     #   are insertions versus consensus; see --rf above) to <x>. The default is
     #   0.5.
     '--gapthresh':ValuedParameter(Prefix='--',Name='gapthresh',Delimiter=' '),\
-    
+
     # --ignorant Strip all base pair secondary structure information from all
     #   input alignments in alifile before building the CM(s).
     '--ignorant':FlagParameter(Prefix='--',Name='ignorant'),\
-    
+
     # --wgsc Use the Gerstein/Sonnhammer/Chothia (GSC) weighting algorithm.
     #   This is the default unless the number of sequences in the alignment
     #   exceeds a cutoff (see --pbswitch), in which case the default becomes
     #   the faster Henikoff position-based weighting scheme.
     '--wgsc':FlagParameter(Prefix='--',Name='wgsc'),\
-    
+
     # --wblosum Use the BLOSUM filtering algorithm to weight the sequences,
     #   instead of the default GSC weighting.
     '--wblosum':FlagParameter(Prefix='--',Name='wblosum'),\
-    
+
     # --wpb Use the Henikoff position-based weighting scheme. This weighting
     #   scheme is automatically used (overriding --wgsc and --wblosum) if the
     #   number of sequences in the alignment exceeds a cutoff (see --pbswitch).
     '--wpb':FlagParameter(Prefix='--',Name='wpb'),\
-    
+
     # --wnone Turn sequence weighting off; e.g. explicitly set all sequence
     #   weights to 1.0.
     '--wnone':FlagParameter(Prefix='--',Name='wnone'),\
-    
+
     # --wgiven Use sequence weights as given in annotation in the input
     #   alignment file. If no weights were given, assume they are all 1.0.
     #   The default is to determine new sequence weights by the Gerstein/
     #   Sonnhammer/Chothia algorithm, ignoring any annotated weights.
     '--wgiven':FlagParameter(Prefix='--',Name='wgiven'),\
-    
+
     # --pbswitch <n> Set the cutoff for automatically switching the weighting
     #   method to the Henikoff position-based weighting scheme to <n>. If the
     #   number of sequences in the alignment exceeds <n> Henikoff weighting is
     #   used. By default <n> is 5000.
     '--pbswitch':ValuedParameter(Prefix='--',Name='pbswitch',Delimiter=' '),\
-    
+
     # --wid <x> Controls the behavior of the --wblosum weighting option by
     #   setting the percent identity for clustering the alignment to <x>.
     '--wid':ValuedParameter(Prefix='--',Name='wid',Delimiter=' '),\
-    
+
     # --eent Use the entropy weighting strategy to determine the effective
     #   sequence number that gives a target mean match state relative entropy.
     '--wgiven':FlagParameter(Prefix='--',Name='wgiven'),\
-    
+
     # --enone Turn off the entropy weighting strategy. The effective sequence
     #   number is just the number of sequences in the alignment.
     '--wgiven':FlagParameter(Prefix='--',Name='wgiven'),\
-    
+
     # --ere <x> Set the target mean match state entropy as <x>. By default the
     #   target entropy 1.46 bits.
     '--ere':ValuedParameter(Prefix='--',Name='ere',Delimiter=' '),\
-    
+
     # --null <f> Read a null model from <f>. The null model defines the
     #   probability of each RNA nucleotide in background sequence, the default
     #   is to use 0.25 for each nucleotide.
     '--null':ValuedParameter(Prefix='--',Name='null',Delimiter=' '),\
-    
-    # --prior <f> Read a Dirichlet prior from <f>, replacing the default mixture 
+
+    # --prior <f> Read a Dirichlet prior from <f>, replacing the default mixture
     #   Dirichlet.
     '--prior':ValuedParameter(Prefix='--',Name='prior',Delimiter=' '),\
-    
+
     # --ctarget <n> Cluster each alignment in alifile by percent identity.
     #   find a cutoff percent id threshold that gives exactly <n> clusters and
-    #   build a separate CM from each cluster. If <n> is greater than the number 
+    #   build a separate CM from each cluster. If <n> is greater than the number
     #   of sequences in the alignment the program will not complain, and each
     #   sequence in the alignment will be its own cluster. Each CM will have a
     #   positive integer appended to its name indicating the order in which it
     #   was built.
     '--ctarget':ValuedParameter(Prefix='--',Name='ctarget',Delimiter=' '),\
-    
+
     # --cmaxid <x> Cluster each sequence alignment in alifile by percent
     #   identity. Define clusters at the cutoff fractional id similarity of <x>
     #   and build a separate CM from each cluster.
     '--cmaxid':ValuedParameter(Prefix='--',Name='cmaxid',Delimiter=' '),\
-    
+
     # --call Build a separate CM from each sequence in each alignment in
     #   alifile. Naming of CMs takes place as described above for --ctarget.
     '--call':FlagParameter(Prefix='--',Name='call'),\
-    
+
     # --corig After building multiple CMs using --ctarget, --cmindiff or --call
     #   as described above, build a final CM using the complete original
     #   alignment from alifile.
     '--corig':FlagParameter(Prefix='--',Name='corig'),\
-    
+
     # --cdump<f> Dump the multiple alignments of each cluster to <f> in
     #   Stockholm format. This option only works in combination with --ctarget,
     #   --cmindiff or --call.
     '--cdump':ValuedParameter(Prefix='--',Name='cdump',Delimiter=' '),\
-    
+
     # --refine <f> Attempt to refine the alignment before building the CM using
     #   expectation-maximization (EM). The final alignment (the alignment used
     #   to build the CM that gets written to cmfile) is written to <f>.
     '--refine':ValuedParameter(Prefix='--',Name='refine',Delimiter=' '),\
-    
+
     # --gibbs Modifies the behavior of --refine so Gibbs sampling is used
     #   instead of EM.
     '--gibbs':FlagParameter(Prefix='--',Name='gibbs'),\
-    
+
     # -s <n> Set the random seed to <n>, where <n> is a positive integer.
-    #   This option can only be used in combination with --gibbs. The default is 
+    #   This option can only be used in combination with --gibbs. The default is
     #   to use time() to generate a different seed for each run, which means
     #   that two different runs of cmbuild --refine <f> --gibbs on the same
     #   alignment will give slightly different results. You can use this option
     #   to generate reproducible results.
     '-s':ValuedParameter(Prefix='-',Name='s',Delimiter=' '),\
-    
+
     # -l With --refine, turn on the local alignment algorithm, which allows the
     #   alignment to span two or more subsequences if necessary (e.g. if the
     #   structures of the query model and target sequence are only partially
     #   shared), allowing certain large insertions and deletions in the
-    #   structure to be penalized differently than normal indels. The default is 
+    #   structure to be penalized differently than normal indels. The default is
     #   to globally align the query model to the target sequences.
     '-l':ValuedParameter(Prefix='-',Name='l',Delimiter=' '),\
-    
+
     # -a With --refine, print the scores of each individual sequence alignment.
     '-a':ValuedParameter(Prefix='-',Name='a',Delimiter=' '),\
-    
+
     # --cyk With --refine, align with the CYK algorithm.
     '--cyk':FlagParameter(Prefix='--',Name='cyk'),\
-    
+
     # --sub With --refine, turn on the sub model construction and alignment
     #   procedure.
     '--sub':FlagParameter(Prefix='--',Name='sub'),\
-    
+
     # --nonbanded With --refine, do not use HMM bands to accelerate alignment.
     #   Use the full CYK algorithm which is guaranteed to give the optimal
     #   alignment. This will slow down the run significantly, especially for
     #   large models.
     '--nonbanded':FlagParameter(Prefix='--',Name='nonbanded'),\
-    
+
     # --tau <x> With --refine, set the tail loss probability used during HMM
     #   band calculation to <f>. This is the amount of probability mass within
     #   the HMM posterior probabilities that is considered negligible. The
@@ -391,60 +391,60 @@ class Cmbuild(CommandLineApplication):
     #   acceleration, but increase the chance of missing the optimal alignment
     #   due to the HMM bands.
     '--tau':ValuedParameter(Prefix='--',Name='tau',Delimiter=' '),\
-    
+
     # --fins With --refine, change the behavior of how insert emissions are
     #   placed in the alignment.
     '--fins':FlagParameter(Prefix='--',Name='fins'),\
-    
+
     # --mxsize <x> With --refine, set the maximum allowable matrix size for
     #   alignment to <x> megabytes.
     '--mxsize':ValuedParameter(Prefix='--',Name='mxsize',Delimiter=' '),\
-    
+
     # --rdump<x> With --refine, output the intermediate alignments at each
     #   iteration of the refinement procedure (as described above for --refine )
     #   to file <f>.
     '--rdump':ValuedParameter(Prefix='--',Name='rdump',Delimiter=' '),\
-    
+
     }
     _parameters = {}
     _parameters.update(_options)
     _command = "cmbuild"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
         http://infernal.janelia.org/
         """
         return help_str
-    
+
     def _refine_out_filename(self):
-        
+
         if self.Parameters['--refine'].isOn():
             refined_filename = self._absolute(str(\
                 self.Parameters['--refine'].Value))
         else:
             raise ValueError, 'No refine output file specified.'
         return refined_filename
-    
+
     def _cm_out_filename(self):
-        
+
         if self.Parameters['-n'].isOn():
             refined_filename = self._absolute(str(\
                 self.Parameters['-n'].Value))
         else:
             raise ValueError, 'No cm output file specified.'
         return refined_filename
-    
+
     def _tempfile_as_multiline_string(self, data):
         """Write a multiline string to a temp file and return the filename.
 
             data: a multiline string to be written to a file.
 
-           * Note: the result will be the filename as a FilePath object 
+           * Note: the result will be the filename as a FilePath object
             (which is a string subclass).
 
         """
@@ -453,7 +453,7 @@ def _tempfile_as_multiline_string(self, data):
         data_file.write(data)
         data_file.close()
         return filename
-    
+
     def _get_result_paths(self,data):
         result = {}
         if self.Parameters['--refine'].isOn():
@@ -462,161 +462,161 @@ def _get_result_paths(self,data):
         if self.Parameters['-n'].isOn():
             cm_name = self._cm_out_filename()
             result['CmFile'] = ResultPath(Path=cm_name,IsWritten=True)
-        
+
         return result
 
-    
+
 class Cmcalibrate(CommandLineApplication):
     """cmcalibrate application controller."""
     _options = {
-    
+
     # -s <n> Set the random number generator seed to <n>, where <n> is a
     #   positive integer. The default is to use time() to generate a different
-    #   seed for each run, which means that two different runs of cmcalibrate on 
+    #   seed for each run, which means that two different runs of cmcalibrate on
     #   the same CM will give slightly different E-value and HMM filter
     #   threshold parameters. You can use this option to generate reproducible
     #   results.
     '-s':ValuedParameter(Prefix='-',Name='s',Delimiter=' '),\
-    
+
     # --forecast <n> Predict the running time of the calibration for cmfile and
     #   provided options and exit, DO NOT perform the calibration.
     '--forecast':ValuedParameter(Prefix='--',Name='forecast',Delimiter=' '),\
-    
+
     # --mpi Run as an MPI parallel program.
     '--mpi':FlagParameter(Prefix='--',Name='mpi'),\
-    
+
     # Expert Options
-    
+
     # --exp-cmL-glc <x> Set the length of random sequence to search for the CM
     #   glocal exponential tail fits to <x> megabases (Mb).
     '--exp-cmL-glc':ValuedParameter(Prefix='--',Name='exp-cmL-glc',\
         Delimiter=' '),\
-    
+
     # --exp-cmL-loc <x> Set the length of random sequence to search for the CM
     #   local exponential tail fits to <x> megabases (Mb).
     '--exp-cmL-loc':ValuedParameter(Prefix='--',Name='exp-cmL-loc',\
         Delimiter=' '),\
-    
+
     # --exp-hmmLn-glc <x> Set the minimum random sequence length to search for
     #   the HMM glocal exponential tail fits to <x> megabases (Mb).
     '--exp-hmmLn-glc':ValuedParameter(Prefix='--',Name='exp-hmmLn-glc',\
         Delimiter=' '),\
-    
+
     # --exp-hmmLn-loc <x> Set the minimum random sequence length to search for
     #   the HMM local exponential tail fits to <x> megabases (Mb).
     '--exp-hmmLn-loc':ValuedParameter(Prefix='--',Name='exp-hmmLn-loc',\
         Delimiter=' '),\
-    
+
     # --exp-hmmLx <x> Set the maximum random sequence length to search when
     #   determining HMM E-values to <x> megabases (Mb).
     '--exp-hmmLx':ValuedParameter(Prefix='--',Name='exp-hmmLx',Delimiter=' '),\
-    
+
     # --exp-fract <x> Set the HMM/CM fraction of dynamic programming
     #   calculations to <x>.
     '--exp-fract':ValuedParameter(Prefix='--',Name='exp-fract',Delimiter=' '),\
-    
+
     # --exp-tailn-cglc <x> During E-value calibration of glocal CM search modes
     #   fit the exponential tail to the high scores in the histogram tail that
     #   includes <x> hits per Mb searched.
     '--exp-tailn-cglc':ValuedParameter(Prefix='--',Name='exp-tailn-cglc',\
         Delimiter=' '),\
-    
+
     # --exp-tailn-cloc <x> During E-value calibration of local CM search modes
     #   fit the exponential tail to the high scores in the histogram tail that
     #   includes <x> hits per Mb searched.
     '--exp-tailn-cloc':ValuedParameter(Prefix='--',Name='exp-tailn-cloc',\
         Delimiter=' '),\
-    
+
     # --exp-tailn-hglc <x> During E-value calibration of glocal HMM search modes
     #   fit the exponential tail to the high scores in the histogram tail that
     #   includes <x> hits per Mb searched.
     '--exp-tailn-hglc':ValuedParameter(Prefix='--',Name='exp-tailn-hglc',\
         Delimiter=' '),\
-    
+
     # --exp-tailn-hloc <x> During E-value calibration of local HMM search modes
     #   fit the exponential tail to the high scores in the histogram tail that
     #   includes <x> hits per Mb searched.
     '--exp-tailn-hloc':ValuedParameter(Prefix='--',Name='exp-tailn-hloc',\
         Delimiter=' '),\
-    
+
     # --exp-tailp <x> Ignore the --exp-tailn prefixed options and fit the <x>
     #   fraction right tail of the histogram to exponential tails, for all
     #   search modes.
     '--exp-tailp':ValuedParameter(Prefix='--',Name='exp-tailp',Delimiter=' '),\
-    
+
     # --exp-tailxn <n> With --exp-tailp enforce that the maximum number of hits
     #   in the tail that is fit is <n>.
     '--exp-tailxn':ValuedParameter(Prefix='--',Name='exp-tailxn',\
         Delimiter=' '),\
-    
+
     # --exp-beta <x> During E-value calibration, by default query-dependent
     #   banding (QDB) is used to accelerate the CM search algorithms with a beta
     #   tail loss probability of 1E-15.
     '--exp-beta':ValuedParameter(Prefix='--',Name='exp-beta',Delimiter=' '),\
-    
+
     # --exp-no-qdb Turn of QDB during E-value calibration. This will slow down
     #   calibration, and is not recommended unless you plan on using --no-qdb in
     #   cmsearch.
     '--exp-no-qdb':FlagParameter(Prefix='--',Name='exp-no-qdb'),\
-    
+
     # --exp-hfile <f> Save the histograms fit for the E-value calibration to
     #   file <f>. The format of this file is two tab delimited columns.
     '--exp-hfile':ValuedParameter(Prefix='--',Name='exp-hfile',Delimiter=' '),\
-    
+
     # --exp-sfile <f> Save a survival plot for the E-value calibration to file
     #   <f>. The format of this file is two tab delimited columns.
     '--exp-sfile':ValuedParameter(Prefix='--',Name='exp-sfile',Delimiter=' '),\
-    
+
     # --exp-qqfile <f> Save a quantile-quantile plot for the E-value calibration
     #   to file <f>. The format of this file is two tab delimited columns.
     '--exp-qqfile':ValuedParameter(Prefix='--',Name='exp-qqfile',\
         Delimiter=' '),\
-    
+
     # --exp-ffile <f> Save statistics on the exponential tail statistics to file
     #   <f>. The file will contain the lambda and mu values for exponential
     #   tails fit to tails of different sizes.
     '--exp-ffile':ValuedParameter(Prefix='--',Name='exp-ffile',Delimiter=' '),\
-    
+
     # --fil-N <n> Set the number of sequences sampled and searched for the HMM
     #   filter threshold calibration to <n>. By default, <n> is 10,000.
     '--fil-N':ValuedParameter(Prefix='--',Name='fil-N',Delimiter=' '),\
-    
+
     # --fil-F <x> Set the fraction of sample sequences the HMM filter must be
     #   able to recognize, and allow to survive, to <x>, where <x> is a positive
     #   real number less than or equal to 1.0. By default, <x> is 0.995.
     '--fil-F':ValuedParameter(Prefix='--',Name='fil-F',Delimiter=' '),\
-    
+
     # --fil-xhmm <x> Set the target number of dynamic programming calculations
     #   for a HMM filtered CM QDB search with beta = 1E-7 to <x> times the
     #   number of calculations required to do an HMM search. By default, <x> is
     #   2.0.
     '--fil-xhmm':ValuedParameter(Prefix='--',Name='fil-xhmm',Delimiter=' '),\
-    
+
     # --fil-tau <x> Set the tail loss probability during HMM band calculation
     #   for HMM filter threshold calibration to <x>.
     '--fil-tau':ValuedParameter(Prefix='--',Name='fil-tau',Delimiter=' '),\
-    
+
     # --fil-gemit During HMM filter calibration, always sample sequences from a
     #   globally configured CM, even when calibrating local modes.
     '--fil-gemit':FlagParameter(Prefix='--',Name='fil-gemit'),\
-    
+
     # --fil-dfile <f> Save statistics on filter threshold calibration, including
     #   HMM and CM scores for all sampled sequences, to file <f>.
     '--fil-dfile':ValuedParameter(Prefix='--',Name='fil-dfile',Delimiter=' '),\
-    
+
     # --mxsize <x> Set the maximum allowable DP matrix size to <x> megabytes.
     '--mxsize':ValuedParameter(Prefix='--',Name='mxsize',Delimiter=' '),\
 
     }
-    
+
     _parameters = {}
     _parameters.update(_options)
     _command = "cmcalibrate"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
@@ -627,90 +627,90 @@ def getHelp(self):
 class Cmemit(CommandLineApplication):
     """cmemit application controller."""
     _options = {
-    
+
     # -o <f> Save the synthetic sequences to file <f> rather than writing them
     #   to stdout.
     '-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' '),\
-    
+
     # -n <n> Generate <n> sequences. Default is 10.
     '-n':ValuedParameter(Prefix='-',Name='n',Delimiter=' '),\
-    
+
     # -u Write the generated sequences in unaligned format (FASTA). This is the
     # default, so this option is probably useless.
     '-u':FlagParameter(Prefix='-',Name='u'),\
-    
+
     # -a Write the generated sequences in an aligned format (STOCKHOLM) with
     #   consensus structure annotation rather than FASTA.
     '-a':FlagParameter(Prefix='-',Name='a'),\
-    
+
     # -c Predict a single majority-rule consensus sequence instead of sampling
     #   sequences from the CM's probability distribution.
     '-c':FlagParameter(Prefix='-',Name='c'),\
-    
+
     # -l Configure the CMs into local mode before emitting sequences. See the
     #   User's Guide for more information on locally configured CMs.
     '-l':FlagParameter(Prefix='-',Name='l'),\
-    
+
     # -s <n> Set the random seed to <n>, where <n> is a positive integer. The
     #   default is to use time() to generate a different seed for each run,
     #   which means that two different runs of cmemit on the same CM will give
     #   different results. You can use this option to generate reproducible
     #   results.
     '-s':ValuedParameter(Prefix='-',Name='s',Delimiter=' '),\
-    
+
     # --rna Specify that the emitted sequences be output as RNA sequences. This
     #   is true by default.
     '--rna':FlagParameter(Prefix='--',Name='rna'),\
-    
+
     # --dna Specify that the emitted sequences be output as DNA sequences. By
     #   default, the output alphabet is RNA.
     '--dna':FlagParameter(Prefix='--',Name='dna'),\
-    
+
     # --tfile <f> Dump tabular sequence parsetrees (tracebacks) for each emitted
     #   sequence to file <f>. Primarily useful for debugging.
     '--tfile':ValuedParameter(Prefix='--',Name='tfile',Delimiter=' '),\
-    
+
     # --exp <x> Exponentiate the emission and transition probabilities of the CM
     #   by <x> and then renormalize those distributions before emitting
     #   sequences.
     '--exp':ValuedParameter(Prefix='--',Name='exp',Delimiter=' '),\
-    
+
     # --begin <n> Truncate the resulting alignment by removing all residues
     #   before consensus column <n>, where <n> is a positive integer no greater
     #   than the consensus length of the CM. Must be used in combination with
     #   --end and either -a or --shmm (a developer option).
     '--begin':ValuedParameter(Prefix='--',Name='begin',Delimiter=' '),\
-    
+
     # --end <n> Truncate the resulting alignment by removing all residues after
     #   consensus column <n>, where <n> is a positive integer no greater than
     #   the consensus length of the CM. Must be used in combination with --begin
     #   and either -a or --shmm (a developer option).
     '--end':ValuedParameter(Prefix='--',Name='end',Delimiter=' '),\
-    
+
     }
     _parameters = {}
     _parameters.update(_options)
     _command = "cmemit"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
         http://infernal.janelia.org/
         """
         return help_str
-    
+
 class Cmscore(CommandLineApplication):
     """cmscore application controller."""
     _options = {
-    
+
     # -n <n> Set the number of sequences to generate and align to <n>. This
     #   option is incompatible with the --infile option.
     '-n':ValuedParameter(Prefix='-',Name='n',Delimiter=' '),\
-    
+
     # -l Turn on the local alignment algorithm, which allows the alignment to
     #   span two or more subsequences if necessary (e.g. if the structures of
     #   the query model and target sequence are only partially shared), allowing
@@ -718,7 +718,7 @@ class Cmscore(CommandLineApplication):
     #   differently than normal indels. The default is to globally align the
     #   query model to the target sequences.
     '-l':FlagParameter(Prefix='-',Name='l'),\
-    
+
     # -s <n> Set the random seed to <n>, where <n> is a positive integer. The
     #   default is to use time() to generate a different seed for each run,
     #   which means that two different runs of cmscore on the same CM will give
@@ -727,63 +727,63 @@ class Cmscore(CommandLineApplication):
     #   score, so -s is incompatible with the --infile option which supplies
     #   the sequences to score in an input file.
     '-s':ValuedParameter(Prefix='-',Name='s',Delimiter=' '),\
-    
+
     # -a Print individual timings and score comparisons for each sequence in
     #   seqfile. By default only summary statistics are printed.
     '-a':FlagParameter(Prefix='-',Name='a'),\
-    
+
     # --sub Turn on the sub model construction and alignment procedure.
     '--sub':FlagParameter(Prefix='--',Name='sub'),\
-    
+
     # --mxsize <x> Set the maximum allowable DP matrix size to <x> megabytes.
     '--mxsize':ValuedParameter(Prefix='--',Name='mxsize',Delimiter=' '),\
-    
+
     # --mpi Run as an MPI parallel program.
     '--mpi':FlagParameter(Prefix='--',Name='mpi'),\
-    
+
     # Expert Options
-    
+
     # --emit Generate sequences to score by sampling from the CM.
     '--emit':FlagParameter(Prefix='--',Name='emit'),\
-    
+
     # --random Generate sequences to score by sampling from the CMs null
     #   distribution. This option turns the --emit option off.
     '--random':FlagParameter(Prefix='--',Name='random'),\
-    
+
     # --infile <f> Sequences to score are read from the file <f>. All the
     #   sequences from <f> are read and scored, the -n and -s options are
     #   incompatible with --infile.
     '--infile':ValuedParameter(Prefix='--',Name='infile',Delimiter=' '),\
-    
+
     # --outfile <f> Save generated sequences that are scored to the file <f> in
     #   FASTA format. This option is incompatible with the --infile option.
     '--outfile':ValuedParameter(Prefix='--',Name='outfile',Delimiter=' '),\
-    
+
     # --Lmin <n1> Must be used in combination with --random and --Lmax <n2>.
     '--Lmin':ValuedParameter(Prefix='--',Name='Lmin',Delimiter=' '),\
-    
+
     # --pad Must be used in combination with --emit and --search. Add <n> cm->W
     #   (max hit length) minus L (sequence <x> length) residues to the 5' and 3'
     #   end of each emitted sequence <x>.
     '--pad':FlagParameter(Prefix='--',Name='pad'),\
-    
+
     # --hbanded Specify that the second stage alignment algorithm be HMM banded
     #   CYK. This option is on by default.
     '--hbanded':FlagParameter(Prefix='--',Name='hbanded'),\
-    
+
     # --tau <x> For stage 2 alignment, set the tail loss probability used during
     #   HMM band calculation to <x>.
     '--tau':ValuedParameter(Prefix='--',Name='tau',Delimiter=' '),\
-    
+
     # --aln2bands With --search, when calculating HMM bands, use an HMM
     #   alignment algorithm instead of an HMM search algorithm.
     '--aln2bands':FlagParameter(Prefix='--',Name='aln2bands'),\
-    
+
     # --hsafe For stage 2 HMM banded alignment, realign any sequences with a
     #   negative alignment score using non-banded CYK to guarantee finding the
     #   optimal alignment.
     '--hsafe':FlagParameter(Prefix='--',Name='hsafe'),\
-    
+
     # --nonbanded Specify that the second stage alignment algorithm be standard,
     #   non-banded, non-D&C CYK. When --nonbanded is enabled, the program fails
     #   with a non-zero exit code and prints an error message if the parsetree
@@ -793,39 +793,39 @@ class Cmscore(CommandLineApplication):
     #   larger RNAs (more than 300 residues) if memory is limiting, --nonbanded
     #   should be used in combination with --scoreonly.
     '--nonbanded':FlagParameter(Prefix='--',Name='nonbanded'),\
-    
+
     # --scoreonly With --nonbanded during the second stage standard non-banded
     #   CYK alignment, use the "score only" variant of the algorithm to save
     #   memory, and don't recover a parse tree.
     '--scoreonly':FlagParameter(Prefix='--',Name='scoreonly'),\
-    
+
     # --viterbi Specify that the second stage alignment algorithm be Viterbi to
     #   a CM Plan 9 HMM.
     '--viterbi':FlagParameter(Prefix='--',Name='viterbi'),\
-    
+
     # --search Run all algorithms in scanning mode, not alignment mode.
     '--search':FlagParameter(Prefix='--',Name='search'),\
-    
+
     # --inside With --search Compare the non-banded scanning Inside algorithm to
     #   the HMM banded scanning Inside algorith, instead of using CYK versions.
     '--inside':FlagParameter(Prefix='--',Name='inside'),\
-    
+
     # --forward With --search Compare the scanning Forward scoring algorithm
     #   against CYK.
     '--forward':FlagParameter(Prefix='--',Name='forward'),\
-    
+
     # --taus <n> Specify the first alignment algorithm as non-banded D&C CYK,
     #   and multiple stages of HMM banded CYK alignment. The first HMM banded
     #   alignment will use tau=1E-<x>, which will be the highest value of tau
     #   used. Must be used in combination with --taue.
     '--taus':ValuedParameter(Prefix='--',Name='taus',Delimiter=' '),\
-    
+
     # --taue <n> Specify the first alignment algorithm as non-banded D&C CYK,
     #   and multiple stages of HMM banded CYK alignment. The final HMM banded
     #   alignment will use tau=1E-<x>, which will be the lowest value of tau
     #   used. Must be used in combination with --taus.
     '--taue':ValuedParameter(Prefix='--',Name='taue',Delimiter=' '),\
-    
+
     # --tfile <f> Print the parsetrees for each alignment of each sequence to
     #   file <f>.
     '--tfile':ValuedParameter(Prefix='--',Name='tfile',Delimiter=' '),\
@@ -835,10 +835,10 @@ class Cmscore(CommandLineApplication):
     _parameters.update(_options)
     _command = "cmscore"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
@@ -849,45 +849,45 @@ def getHelp(self):
 class Cmsearch(CommandLineApplication):
     """cmsearch application controller."""
     _options = {
-    
+
     # -o <f> Save the high-scoring alignments of hits to a file <f>. The default
     #   is to write them to standard output.
     '-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' '),\
-    
+
     # -g <f> Turn on the 'glocal' alignment algorithm, local with respect to the
     #   target database, and global with respect to the model. By default, the
     #   local alignment algorithm is used which is local with respect to both
     #   the target sequence and the model.
     '-g':ValuedParameter(Prefix='-',Name='g',Delimiter=' '),\
-    
+
     # -p Append posterior probabilities to alignments of hits.
     '-p':FlagParameter(Prefix='-',Name='p'),\
-    
+
     # -x Annotate non-compensatory basepairs and basepairs that include a gap in
     #   the left and/or right half of the pair with x's in the alignments of
     #   hits.
     '-x':FlagParameter(Prefix='-',Name='x'),\
-    
+
     # -Z <x> Calculate E-values as if the target database size was <x> megabases
     #   (Mb). Ignore the actual size of the database. This option is only valid
     #   if the CM file has been calibrated. Warning: the predictions for timings
     #   and survival fractions will be calculated as if the database was of size
     #   <x> Mb, which means they will be inaccurate.
     '-Z':ValuedParameter(Prefix='-',Name='Z',Delimiter=' '),\
-    
+
     # --toponly Only search the top (Watson) strand of the sequences in seqfile.
     #   By default, both strands are searched.
     '--toponly':FlagParameter(Prefix='--',Name='toponly'),\
-    
+
     # --bottomonly Only search the bottom (Crick) strand of the sequences in
     #   seqfile. By default, both strands are searched.
     '--bottomonly':FlagParameter(Prefix='--',Name='bottomonly'),\
-    
+
     # --forecast <n> Predict the running time of the search with provided files
     #   and options and exit, DO NOT perform the search. This option is only
     #   available with calibrated CM files.
     '--forecast':ValuedParameter(Prefix='--',Name='forecast',Delimiter=' '),\
-    
+
     # --informat <s> Assert that the input seqfile is in format <s>. Do not run
     #   Babelfish format autodection. This increases the reliability of the
     #   program somewhat, because the Babelfish can make mistakes; particularly
@@ -895,94 +895,94 @@ class Cmsearch(CommandLineApplication):
     #   case-insensitive. Acceptable formats are: FASTA, EMBL, UNIPROT, GENBANK,
     #   and DDBJ. <s> is case-insensitive.
     '--informat':ValuedParameter(Prefix='--',Name='informat',Delimiter=' '),\
-    
+
     # --mxsize <x> Set the maximum allowable DP matrix size to <x> megabytes.
     '--mxsize':ValuedParameter(Prefix='--',Name='mxsize',Delimiter=' '),\
-    
+
     # --mpi Run as an MPI parallel program.
     '--mpi':FlagParameter(Prefix='--',Name='mpi'),\
-    
+
     # Expert Options
-    
+
     # --inside Use the Inside algorithm for the final round of searching. This
     #   is true by default.
     '--inside':FlagParameter(Prefix='--',Name='inside'),\
-    
+
     # --cyk Use the CYK algorithm for the final round of searching.
     '--cyk':FlagParameter(Prefix='--',Name='cyk'),\
-    
+
     # --viterbi Search only with an HMM. This is much faster but less sensitive
     #   than a CM search. Use the Viterbi algorithm for the HMM search.
     '--viterbi':FlagParameter(Prefix='--',Name='viterbi'),\
-    
+
     # --forward Search only with an HMM. This is much faster but less sensitive
     #   than a CM search. Use the Forward algorithm for the HMM search.
     '--forward':FlagParameter(Prefix='--',Name='forward'),\
-    
+
     # -E <x> Set the E-value cutoff for the per-sequence/strand ranked hit list
     #   to <x>, where <x> is a positive real number.
     '-E':ValuedParameter(Prefix='-',Name='E',Delimiter=' '),\
-    
+
     # -T <x> Set the bit score cutoff for the per-sequence ranked hit list to
     #   <x>, where <x> is a positive real number.
     '-T':ValuedParameter(Prefix='-',Name='T',Delimiter=' '),\
-    
+
     # --nc Set the bit score cutoff as the NC cutoff value used by Rfam curators
     #   as the noise cutoff score.
     '--nc':FlagParameter(Prefix='--',Name='nc'),\
-    
+
     # --ga Set the bit score cutoff as the GA cutoff value used by Rfam curators
     #   as the gathering threshold.
     '--ga':FlagParameter(Prefix='--',Name='ga'),\
-    
+
     # --tc Set the bit score cutoff as the TC cutoff value used by Rfam curators
     #   as the trusted cutoff.
     '--tc':FlagParameter(Prefix='--',Name='tc'),\
-    
+
     # --no-qdb Do not use query-dependent banding (QDB) for the final round of
     #   search.
     '--no-qdb':FlagParameter(Prefix='--',Name='no-qdb'),\
-    
+
     # --beta " <x>" For query-dependent banding (QDB) during the final round of
     #   search, set the beta parameter to <x> where <x> is any positive real
     #   number less than 1.0.
     '--beta':ValuedParameter(Prefix='--',Name='beta',Delimiter=' '),\
-    
+
     # --hbanded Use HMM bands to accelerate the final round of search.
     #   Constraints for the CM search are derived from posterior probabilities
     #   from an HMM. This is an experimental option and it is not recommended
     #   for use unless you know exactly what you're doing.
     '--hbanded':FlagParameter(Prefix='--',Name='hbanded'),\
-    
+
     # --tau <x> Set the tail loss probability during HMM band calculation to
     #   <x>.
     '--tau':ValuedParameter(Prefix='--',Name='tau',Delimiter=' '),\
-    
+
     # --fil-no-hmm Turn the HMM filter off.
     '--fil-no-hmm':FlagParameter(Prefix='--',Name='fil-no-hmm'),\
-    
+
     # --fil-no-qdb Turn the QDB filter off.
     '--fil-no-qdb':FlagParameter(Prefix='--',Name='fil-no-qdb'),\
-    
+
     # --fil-beta For the QDB filter, set the beta parameter to <x> where <x> is
     #   any positive real number less than 1.0.
     '--fil-beta':FlagParameter(Prefix='--',Name='fil-beta'),\
-    
+
     # --fil-T-qdb <x> Set the bit score cutoff for the QDB filter round to <x>,
     #   where <x> is a positive real number.
     '--fil-T-qdb':ValuedParameter(Prefix='--',Name='fil-T-qdb',Delimiter=' '),\
-    
+
     # --fil-T-hmm <x> Set the bit score cutoff for the HMM filter round to <x>,
     #   where <x> is a positive real number.
     '--fil-T-hmm':ValuedParameter(Prefix='--',Name='fil-T-hmm',Delimiter=' '),\
-    
+
     # --fil-E-qdb <x> Set the E-value cutoff for the QDB filter round. <x>,
     #   where <x> is a positive real number. Hits with E-values better than
     #   (less than) or equal to this threshold will survive and be passed to the
     #   final round. This option is only available if the CM file has been
     #   calibrated.
     '--fil-E-qdb':ValuedParameter(Prefix='--',Name='fil-E-qdb',Delimiter=' '),\
-    
+
     # --fil-E-hmm <x> Set the E-value cutoff for the HMM filter round. <x>,
     #   where <x> is a positive real number. Hits with E-values better than
     #   (less than) or equal to this threshold will survive and be passed to the
@@ -990,38 +990,38 @@ class Cmsearch(CommandLineApplication):
     #   to the final round of search. This option is only available if the CM
     #   file has been calibrated.
     '--fil-E-hmm':ValuedParameter(Prefix='--',Name='fil-E-hmm',Delimiter=' '),\
-    
+
     # --fil-Smax-hmm <x> Set the maximum predicted survival fraction for an HMM
     #   filter as <x>, where <x> is a positive real number less than 1.0.
     '--fil-Smax-hmm':ValuedParameter(Prefix='--',Name='fil-Smax-hmm',\
         Delimiter=' '),\
-    
+
     # --noalign Do not calculate and print alignments of each hit, only print
     #   locations and scores.
     '--noalign':FlagParameter(Prefix='--',Name='noalign'),\
-    
+
     # --aln-hbanded Use HMM bands to accelerate alignment during the hit
     #   alignment stage.
     '--aln-hbanded':FlagParameter(Prefix='--',Name='aln-hbanded'),\
-    
+
     # --aln-optacc Calculate alignments of hits from final round of search using
     #   the optimal accuracy algorithm which computes the alignment that
     #   maximizes the summed posterior probability of all aligned residues given
     #   the model, which can be different from the highest scoring one.
     '--aln-optacc':FlagParameter(Prefix='--',Name='aln-optacc'),\
-    
+
     # --tabfile <f> Create a new output file <f> and print tabular results to
     #   it.
     '--tabfile':ValuedParameter(Prefix='--',Name='tabfile',Delimiter=' '),\
-    
+
     # --gcfile <f> Create a new output file <f> and print statistics of the GC
     #   content of the sequences in seqfile to it.
     '--gcfile':ValuedParameter(Prefix='--',Name='gcfile',Delimiter=' '),\
-    
+
     # --rna Output the hit alignments as RNA sequences alignments. This is true
     #   by default.
     '--rna':FlagParameter(Prefix='--',Name='rna'),\
-    
+
     # --dna Output the hit alignments as DNA sequence alignments.
     '--dna':FlagParameter(Prefix='--',Name='dna'),\
 
@@ -1030,32 +1030,32 @@ class Cmsearch(CommandLineApplication):
     _parameters.update(_options)
     _command = "cmsearch"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
         http://infernal.janelia.org/
         """
         return help_str
-    
+
     def _tabfile_out_filename(self):
-        
+
         if self.Parameters['--tabfile'].isOn():
             tabfile_filename = self._absolute(str(\
                 self.Parameters['--tabfile'].Value))
         else:
             raise ValueError, 'No tabfile output file specified.'
         return tabfile_filename
-    
+
     def _tempfile_as_multiline_string(self, data):
         """Write a multiline string to a temp file and return the filename.
 
             data: a multiline string to be written to a file.
 
-           * Note: the result will be the filename as a FilePath object 
+           * Note: the result will be the filename as a FilePath object
             (which is a string subclass).
 
         """
@@ -1064,132 +1064,132 @@ def _tempfile_as_multiline_string(self, data):
         data_file.write(data)
         data_file.close()
         return filename
-    
+
     def _get_result_paths(self,data):
         result = {}
         if self.Parameters['--tabfile'].isOn():
             out_name = self._tabfile_out_filename()
             result['SearchResults'] = ResultPath(Path=out_name,IsWritten=True)
-        
+
         return result
 
 class Cmstat(CommandLineApplication):
     """cmstat application controller."""
     _options = {
-    
+
     # -g Turn on the 'glocal' alignment algorithm, local with respect to the
     #   target database, and global with respect to the model. By default, the
     #   model is configured for local alignment which is local with respect to
     #   both the target sequence and the model.
     '-g':FlagParameter(Prefix='-',Name='g'),\
-    
+
     # -m print general statistics on the models in cmfile and the alignment it
     #   was built from.
     '-m':FlagParameter(Prefix='-',Name='m'),\
-    
+
     # -Z <x> Calculate E-values as if the target database size was <x> megabases
     #   (Mb). Ignore the actual size of the database. This option is only valid
     #   if the CM file has been calibrated.
     '-Z':ValuedParameter(Prefix='-',Name='Z',Delimiter=' '),\
-    
+
     # --all print all available statistics
     '--all':FlagParameter(Prefix='--',Name='all'),\
-    
+
     # --le print local E-value statistics. This option only works if cmfile has
     #   been calibrated with cmcalibrate.
     '--le':FlagParameter(Prefix='--',Name='le'),\
-    
+
     # --ge print glocal E-value statistics. This option only works if cmfile has
     #   been calibrated with cmcalibrate.
     '--ge':FlagParameter(Prefix='--',Name='ge'),\
-    
+
     # --beta <x> With the --search option set the beta parameter for the query-
     #   dependent banding algorithm stages to <x> Beta is the probability mass
     #   considered negligible during band calculation. The default is 1E-7.
     '--beta':ValuedParameter(Prefix='--',Name='beta',Delimiter=' '),\
-    
+
     # --qdbfile <f> Save the query-dependent bands (QDBs) for each state to file
     #   <f>
     '--qdbfile':ValuedParameter(Prefix='--',Name='qdbfile',Delimiter=' '),\
-    
+
     # Expert Options
-    
+
     # --lfi Print the HMM filter thresholds for the range of relevant CM bit
     #   score cutoffs for searches with locally configured models using the
     #   Inside algorithm.
     '--lfi':FlagParameter(Prefix='--',Name='lfi'),\
-    
+
     # --gfi Print the HMM filter thresholds for the range of relevant CM bit
     #   score cutoffs for searches with globally configured models using the
     #   Inside algorithm.
     '--gfi':FlagParameter(Prefix='--',Name='gfi'),\
-    
+
     # --lfc Print the HMM filter thresholds for the range of relevant CM bit
     #   score cutoffs for searches with locally configured models using the CYK
     #   algorithm.
     '--lfc':FlagParameter(Prefix='--',Name='lfc'),\
-    
+
     # --gfc Print the HMM filter thresholds for the range of relevant CM bit
     #   score cutoffs for searches with globally configured models using the CYK
     #   algorithm.
     '--gfc':FlagParameter(Prefix='--',Name='gfc'),\
-    
+
     # -E <x> Print filter threshold statistics for an HMM filter if a final CM
     #   E-value cutoff of <x> were to be used for a run of cmsearch on 1 MB of
     #   sequence.
     '-E':ValuedParameter(Prefix='-',Name='E',Delimiter=' '),\
-    
+
     # -T <x> Print filter threshold statistics for an HMM filter if a final CM
     #   bit score cutoff of <x> were to be used for a run of cmsearch.
     '-T':ValuedParameter(Prefix='-',Name='T',Delimiter=' '),\
-    
+
     # --nc Print filter threshold statistics for an HMM filter if a CM bit score
     #   cutoff equal to the Rfam NC cutoff were to be used for a run of
     #   cmsearch.
     '--nc':FlagParameter(Prefix='--',Name='nc'),\
-    
+
     # --ga Print filter threshold statistics for an HMM filter if a CM bit score
     #   cutoff of Rfam GA cutoff value were to be used for a run of cmsearch.
     '--ga':FlagParameter(Prefix='--',Name='ga'),\
-    
+
     # --tc Print filter threshold statistics for an HMM filter if a CM bit score
     #   cutoff equal to the Rfam TC cutoff value were to be used for a run of
     #   cmsearch.
     '--tc':FlagParameter(Prefix='--',Name='tc'),\
-    
+
     # --seqfile <x> With the -E option, use the database size of the database in
     #   <x> instead of the default database size of 1 MB.
     '--seqfile':ValuedParameter(Prefix='--',Name='seqfile',Delimiter=' '),\
-    
+
     # --toponly In combination with --seqfile <x> option, only consider the top
     #   strand of the database in <x> instead of both strands. --search perform
     #   an experiment to determine how fast the CM(s) can search with different
     #   search algorithms.
     '--toponly':FlagParameter(Prefix='--',Name='toponly'),\
-    
+
     # --cmL <n> With the --search option set the length of sequence to search
     #   with CM algorithms as <n> residues. By default, <n> is 1000.
     '--cmL':ValuedParameter(Prefix='--',Name='cmL',Delimiter=' '),\
-    
+
     # --hmmL <n> With the --search option set the length of sequence to search
     #   with HMM algorithms as <n> residues. By default, <n> is 100,000.
     '--hmmL':ValuedParameter(Prefix='--',Name='hmmL',Delimiter=' '),\
-    
+
     # --efile <f> Save a plot of cmsearch HMM filter E value cutoffs versus CM
     #   E-value cutoffs in xmgrace format to file <f>. This option must be used
     #   in combination with --lfi, --gfi, --lfc or --gfc.
     '--efile':ValuedParameter(Prefix='--',Name='efile',Delimiter=' '),\
-    
+
     # --bfile <f> Save a plot of cmsearch HMM bit score cutoffs versus CM bit
     #   score cutoffs in xmgrace format to file <f>. This option must be used in
     #   combination with --lfi, --gfi, --lfc or --gfc.
     '--bfile':ValuedParameter(Prefix='--',Name='bfile',Delimiter=' '),\
-    
+
     # --sfile <f> Save a plot of cmsearch predicted survival fraction from the
     #   HMM filter versus CM E value cutoff in xmgrace format to file <f>. This
     #   option must be used in combination with --lfi, --gfi, --lfc or --gfc.
     '--sfile':ValuedParameter(Prefix='--',Name='sfile',Delimiter=' '),\
-    
+
     # --xfile <f> Save a plot of 'xhmm' versus CM E value cutoff in xmgrace
     #   format to file <f> 'xhmm' is the ratio of the number of dynamic
     #   programming calculations predicted to be required for the HMM filter and
@@ -1197,12 +1197,12 @@ class Cmstat(CommandLineApplication):
     #   programming calculations for the filter alone. This option must be
     #   used in combination with --lfi, --gfi, --lfc or --gfc.
     '--xfile':ValuedParameter(Prefix='--',Name='xfile',Delimiter=' '),\
-    
+
     # --afile <f> Save a plot of the predicted acceleration for an HMM filtered
     #   search versus CM E value cutoff in xmgrace format to file <f>. This
     #   option must be used in combination with --lfi, --gfi, --lfc or --gfc.
     '--afile':ValuedParameter(Prefix='--',Name='afile',Delimiter=' '),\
-    
+
     # --bits With --efile, --sfile, --xfile, and --afile use CM bit score
     #   cutoffs instead of CM E value cutoffs for the x-axis values of the plot.
     '--bits':FlagParameter(Prefix='--',Name='bits'),\
@@ -1212,10 +1212,10 @@ class Cmstat(CommandLineApplication):
     _parameters.update(_options)
     _command = "cmstat"
     _suppress_stderr=True
-    
+
     def getHelp(self):
         """Method that points to the Infernal documentation."""
-        
+
         help_str = \
         """
         See Infernal documentation at:
@@ -1226,7 +1226,7 @@ def getHelp(self):
 def cmbuild_from_alignment(aln, structure_string, refine=False, \
     return_alignment=False,params=None):
     """Uses cmbuild to build a CM file given an alignment and structure string.
-    
+
         - aln: an Alignment object or something that can be used to construct
             one.  All sequences must be the same length.
         - structure_string: vienna structure string representing the consensus
@@ -1236,7 +1236,7 @@ def cmbuild_from_alignment(aln, structure_string, refine=False, \
             (Default=False)
         - return_alignment: Return (in Stockholm format) alignment file used to
             construct the CM file.  This will either be the original alignment
-            and structure string passed in, or the refined alignment if --refine 
+            and structure string passed in, or the refined alignment if --refine
             was used. (Default=False)
             - Note.  This will be a string that can either be written to a file
                 or parsed.
@@ -1250,26 +1250,26 @@ def cmbuild_from_alignment(aln, structure_string, refine=False, \
     #Make new Cmbuild app instance.
     app = Cmbuild(InputHandler='_input_as_paths',WorkingDir='/tmp',\
         params=params)
-    
+
     #turn on refine flag if True.
     if refine:
         _, tmp_file = mkstemp(dir=app.WorkingDir)
         app.Parameters['--refine'].on(tmp_file)
-        
+
     #Get alignment in Stockholm format
     aln_file_string = stockholm_from_alignment(aln,GC_annotation=struct_dict)
-    
+
     #get path to alignment filename
     aln_path = app._input_as_multiline_string(aln_file_string)
     cm_path = aln_path.split('.txt')[0]+'.cm'
     app.Parameters['-n'].on(cm_path)
-    
+
     filepaths = [cm_path,aln_path]
-    
+
     res = app(filepaths)
-    
+
     cm_file = res['CmFile'].read()
-    
+
     if return_alignment:
         #If alignment was refined, return refined alignment and structure,
         # otherwise return original alignment and structure.
@@ -1286,9 +1286,9 @@ def cmbuild_from_alignment(aln, structure_string, refine=False, \
 def cmbuild_from_file(stockholm_file_path, refine=False,return_alignment=False,\
     params=None):
     """Uses cmbuild to build a CM file given a stockholm file.
-    
+
         - stockholm_file_path: a path to a stockholm file.  This file should
-            contain a multiple sequence alignment formated in Stockholm format. 
+            contain a multiple sequence alignment formated in Stockholm format.
             This must contain a sequence structure line:
                 #=GC SS_cons <structure string>
         - refine: refine the alignment and realign before building the cm.
@@ -1302,24 +1302,24 @@ def cmbuild_from_file(stockholm_file_path, refine=False,return_alignment=False,\
     info, aln, structure_string = \
         list(MinimalRfamParser(open(stockholm_file_path,'U'),\
             seq_constructor=ChangedSequence))[0]
-    
+
     #call cmbuild_from_alignment.
     res = cmbuild_from_alignment(aln, structure_string, refine=refine, \
         return_alignment=return_alignment,params=params)
     return res
 
-def cmalign_from_alignment(aln, structure_string, seqs, moltype,\
+def cmalign_from_alignment(aln, structure_string, seqs, moltype=DNA,\
     include_aln=True,refine=False, return_stdout=False,params=None,\
     cmbuild_params=None):
     """Uses cmbuild to build a CM file, then cmalign to build an alignment.
-    
+
         - aln: an Alignment object or something that can be used to construct
             one.  All sequences must be the same length.
         - structure_string: vienna structure string representing the consensus
             stucture for the sequences in aln.  Must be the same length as the
             alignment.
         - seqs: SequenceCollection object or something that can be used to
-            construct one, containing unaligned sequences that are to be aligned 
+            construct one, containing unaligned sequences that are to be aligned
             to the aligned sequences in aln.
         - moltype: Cogent moltype object.  Must be RNA or DNA.
         - include_aln: Boolean to include sequences in aln in final alignment.
@@ -1336,65 +1336,65 @@ def cmalign_from_alignment(aln, structure_string, seqs, moltype,\
     int_map, int_keys = seqs.getIntMap()
     #Create SequenceCollection from int_map.
     int_map = SequenceCollection(int_map,MolType=moltype)
-    
+
     cm_file, aln_file_string = cmbuild_from_alignment(aln, structure_string,\
         refine=refine,return_alignment=True,params=cmbuild_params)
-    
+
     if params is None:
-        params = {}    
+        params = {}
     params.update({MOLTYPE_MAP[moltype]:True})
-    
+
     app = Cmalign(InputHandler='_input_as_paths',WorkingDir='/tmp',\
         params=params)
     app.Parameters['--informat'].on('FASTA')
-    
+
     #files to remove that aren't cleaned up by ResultPath object
-    to_remove = []    
+    to_remove = []
     #turn on --withali flag if True.
     if include_aln:
         app.Parameters['--withali'].on(\
             app._tempfile_as_multiline_string(aln_file_string))
         #remove this file at end
         to_remove.append(app.Parameters['--withali'].Value)
-    
+
     seqs_path = app._input_as_multiline_string(int_map.toFasta())
     cm_path = app._tempfile_as_multiline_string(cm_file)
-    
+
     #add cm_path to to_remove
     to_remove.append(cm_path)
     paths = [cm_path,seqs_path]
 
     _, tmp_file = mkstemp(dir=app.WorkingDir)
     app.Parameters['-o'].on(tmp_file)
-    
+
     res = app(paths)
-    
+
     info, aligned, struct_string = \
         list(MinimalRfamParser(res['Alignment'].readlines(),\
             seq_constructor=SEQ_CONSTRUCTOR_MAP[moltype]))[0]
-    
+
     #Make new dict mapping original IDs
     new_alignment={}
     for k,v in aligned.NamedSeqs.items():
         new_alignment[int_keys.get(k,k)]=v
     #Create an Alignment object from alignment dict
     new_alignment = Alignment(new_alignment,MolType=moltype)
-    
+
     std_out = res['StdOut'].read()
     #clean up files
     res.cleanUp()
     for f in to_remove: remove(f)
-    
+
     if return_stdout:
         return new_alignment, struct_string, std_out
     else:
         return new_alignment, struct_string
-    
 
-def cmalign_from_file(cm_file_path, seqs, moltype, alignment_file_path=None,\
+
+def cmalign_from_file(cm_file_path, seqs, moltype=DNA, alignment_file_path=None,\
     include_aln=False,return_stdout=False,params=None):
     """Uses cmalign to align seqs to alignment in cm_file_path.
-        
+
         - cm_file_path: path to the file created by cmbuild, containing aligned
             sequences. This will be used to align sequences in seqs.
         - seqs: unaligned sequendes that are to be aligned to the sequences in
@@ -1414,39 +1414,39 @@ def cmalign_from_file(cm_file_path, seqs, moltype, alignment_file_path=None,\
     """
     #NOTE: Must degap seqs or Infernal well seg fault!
     seqs = SequenceCollection(seqs,MolType=moltype).degap()
-    
+
     #Create mapping between abbreviated IDs and full IDs
     int_map, int_keys = seqs.getIntMap()
     #Create SequenceCollection from int_map.
     int_map = SequenceCollection(int_map,MolType=moltype)
-    
+
     if params is None:
         params = {}
     params.update({MOLTYPE_MAP[moltype]:True})
-    
+
     app = Cmalign(InputHandler='_input_as_paths',WorkingDir='/tmp',\
         params=params)
     app.Parameters['--informat'].on('FASTA')
-        
+
     #turn on --withali flag if True.
     if include_aln:
         if alignment_file_path is None:
             raise DataError, """Must have path to alignment file used to build CM if include_aln=True."""
         else:
             app.Parameters['--withali'].on(alignment_file_path)
-                
+
     seqs_path = app._input_as_multiline_string(int_map.toFasta())
     paths = [cm_file_path,seqs_path]
-    
+
     _, tmp_file = mkstemp(dir=app.WorkingDir)
     app.Parameters['-o'].on(tmp_file)
     res = app(paths)
-    
+
     info, aligned, struct_string = \
         list(MinimalRfamParser(res['Alignment'].readlines(),\
             seq_constructor=SEQ_CONSTRUCTOR_MAP[moltype]))[0]
-    
-    
+
+
     #Make new dict mapping original IDs
     new_alignment={}
     for k,v in aligned.items():
@@ -1459,11 +1459,11 @@ def cmalign_from_file(cm_file_path, seqs, moltype, alignment_file_path=None,\
         return new_alignment, struct_string, std_out
     else:
         return new_alignment, struct_string
-    
+
 def cmsearch_from_alignment(aln, structure_string, seqs, moltype, cutoff=0.0,\
     refine=False,params=None):
     """Uses cmbuild to build a CM file, then cmsearch to find homologs.
-    
+
         - aln: an Alignment object or something that can be used to construct
             one.  All sequences must be the same length.
         - structure_string: vienna structure string representing the consensus
@@ -1486,40 +1486,40 @@ def cmsearch_from_alignment(aln, structure_string, seqs, moltype, cutoff=0.0,\
     int_map, int_keys = seqs.getIntMap()
     #Create SequenceCollection from int_map.
     int_map = SequenceCollection(int_map,MolType=moltype)
-    
+
     cm_file, aln_file_string = cmbuild_from_alignment(aln, structure_string,\
         refine=refine,return_alignment=True)
-    
+
     app = Cmsearch(InputHandler='_input_as_paths',WorkingDir='/tmp',\
         params=params)
     app.Parameters['--informat'].on('FASTA')
     app.Parameters['-T'].on(cutoff)
-    
+
     to_remove = []
-    
+
     seqs_path = app._input_as_multiline_string(int_map.toFasta())
     cm_path = app._tempfile_as_multiline_string(cm_file)
     paths = [cm_path,seqs_path]
     to_remove.append(cm_path)
-    
+
     _, tmp_file = mkstemp(dir=app.WorkingDir)
     app.Parameters['--tabfile'].on(tmp_file)
     res = app(paths)
-    
+
     search_results = list(CmsearchParser(res['SearchResults'].readlines()))
     if search_results:
         for i,line in enumerate(search_results):
             label = line[1]
             search_results[i][1]=int_keys.get(label,label)
-    
+
     res.cleanUp()
     for f in to_remove:remove(f)
-    
+
     return search_results
 
 def cmsearch_from_file(cm_file_path, seqs, moltype, cutoff=0.0, params=None):
     """Uses cmbuild to build a CM file, then cmsearch to find homologs.
-    
+
         - cm_file_path: path to the file created by cmbuild, containing aligned
             sequences. This will be used to search sequences in seqs.
         - seqs: SequenceCollection object or something that can be used to
@@ -1537,28 +1537,27 @@ def cmsearch_from_file(cm_file_path, seqs, moltype, cutoff=0.0, params=None):
     int_map, int_keys = seqs.getIntMap()
     #Create SequenceCollection from int_map.
     int_map = SequenceCollection(int_map,MolType=moltype)
-    
+
     app = Cmsearch(InputHandler='_input_as_paths',WorkingDir='/tmp',\
         params=params)
     app.Parameters['--informat'].on('FASTA')
     app.Parameters['-T'].on(cutoff)
-    
+
     seqs_path = app._input_as_multiline_string(int_map.toFasta())
 
     paths = [cm_file_path,seqs_path]
-    
+
     _, tmp_file = mkstemp(dir=app.WorkingDir)
     app.Parameters['--tabfile'].on(tmp_file)
     res = app(paths)
-    
+
     search_results = list(CmsearchParser(res['SearchResults'].readlines()))
-    
-    if search_results:    
+
+    if search_results:
         for i,line in enumerate(search_results):
             label = line[1]
             search_results[i][1]=int_keys.get(label,label)
-    
+
     res.cleanUp()
 
     return search_results
-