From f1db9b1fa6d89ac7e7770ec4f5a57e1ddff61059 Mon Sep 17 00:00:00 2001 From: mgaynor1 Date: Wed, 17 Jul 2024 20:00:03 -0400 Subject: [PATCH] Updated citation! --- README.md | 6 ++++-- docs/authors.html | 12 +++++++----- docs/index.html | 4 ++-- docs/pkgdown.yml | 2 +- docs/search.json | 2 +- inst/CITATION | 8 +++++--- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index afb8faf..cd3333c 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,12 @@ For implementation, see our [Basic Example](https://mlgaynor.com/nQuack/articles ## References -Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, and Soltis PS. nQuack: An R package for predicting ploidy level from sequence data using site-based heterozygosity. In Review. See preprint on bioRxiv, [doi.org: 10.1101/2024.02.12.579894](https://www.biorxiv.org/content/10.1101/2024.02.12.579894v1) +Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, and Soltis PS. 2024. nQuack: An R package for predicting ploidy level from sequence data using site-based heterozygosity. *Applications in Plant Sciences* 12(4):e11606. [doi: 10.1002/aps3.11606](https://www.doi.org/10.1002/aps3.11606) ## Up Next: -- If you have sequence data with known ploidal level for a mixed-ploidy system, let us know. We would love to collaborate with you. +- If you have sequence data with known plodial level for a mixed-ploidy system, let us know. We would love to collaborate with you. To be included in v2.0, please send me an email at shellyleegaynor at gmail. + + diff --git a/docs/authors.html b/docs/authors.html index 193833b..e9f4b6a 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -69,15 +69,17 @@

Citation

Gaynor M, Landis J, O'Connor T, Laport R, Doyle J, Soltis D, Ponciano J, Soltis P (2024). “nQuack: An R package for predicting ploidal level from sequence data using site-based heterozygosity.” -bioRxiv. -doi:10.1101/2024.02.12.579894. +Applications in Plant Sciences, 12(4), e11606. +doi:10.1002/aps3.11606.

@Article{,
   title = {nQuack: An R package for predicting ploidal level from sequence data using site-based heterozygosity},
   year = {2024},
-  journal = {bioRxiv},
-  publisher = {Cold Spring Harbor Laboratory},
-  doi = {10.1101/2024.02.12.579894},
+  journal = {Applications in Plant Sciences},
+  doi = {10.1002/aps3.11606},
+  pages = {e11606},
+  volume = {12},
+  number = {4},
   author = {Michelle L. Gaynor and Jacob B. Landis and Timothy K. O'Connor and Robert G. Laport and Jeff J. Doyle and Douglas E. Soltis and José Miguel Ponciano and Pamela S. Soltis},
 }
diff --git a/docs/index.html b/docs/index.html index 8cecee5..aeae845 100644 --- a/docs/index.html +++ b/docs/index.html @@ -108,13 +108,13 @@

Warning: samtools must be local!

References

-

Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, and Soltis PS. nQuack: An R package for predicting ploidy level from sequence data using site-based heterozygosity. In Review. See preprint on bioRxiv, doi.org: 10.1101/2024.02.12.579894

+

Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, and Soltis PS. 2024. nQuack: An R package for predicting ploidy level from sequence data using site-based heterozygosity. Applications in Plant Sciences 12(4):e11606. doi: 10.1002/aps3.11606

Up Next:

    -
  • If you have sequence data with known ploidal level for a mixed-ploidy system, let us know. We would love to collaborate with you.
  • +
  • If you have sequence data with known plodial level for a mixed-ploidy system, let us know. We would love to collaborate with you. To be included in v2.0, please send me an email at shellyleegaynor at gmail.
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 0a2df69..4444271 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,7 +7,7 @@ articles: ModelOptions: ModelOptions.html Outliers: Outliers.html SimulateData: SimulateData.html -last_built: 2024-05-02T20:00Z +last_built: 2024-07-17T23:58Z urls: reference: http://mlgaynor.com/nQuack/reference article: http://mlgaynor.com/nQuack/articles diff --git a/docs/search.json b/docs/search.json index 8449a31..f7b6d1e 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1 +1 @@ -[{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"data-description","dir":"Articles","previous_headings":"","what":"Data description","title":"Basic Example","text":"“inst/extdata/” folder found github, provide three samples Galax urceolata collected 2021 part dissertation. samples published, submitted NCBI-SRA near future. data generated target enrichment species-specific probes. samples include diploid (MLG013), triploid (MLG015), tetraploid (MLG014). raw files created following preprocessing steps outlined ‘Data Preparation’.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"load-packages","dir":"Articles","previous_headings":"","what":"Load packages","title":"Basic Example","text":"tutorial requires nQuack dplyr.","code":"library(nQuack) library(dplyr) library(kableExtra)"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"prepare-data","dir":"Articles","previous_headings":"Data preparation","what":"01. Prepare data","title":"Basic Example","text":"Warning, step takes results file slightly larger input BAM. time taken step can likely reduce future, however, keep file large conducting true filtering steps stage. suggest running one CPU night cluster, though may take multiple days finish depending data.","code":"# Set in and out paths of files inpath <- \"../inst/extdata/01_raw/\" outpath <- \"../inst/extdata/02_prepared/\" # List files in the inpath and remove their ending filelist <- list.files(path = inpath, pattern = \"*.bam\" ) filelist <- gsub(\".bam\", \"\", filelist) for( i in 1:length(filelist)){ prepare_data(filelist[i], inpath, outpath) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"process-data-","dir":"Articles","previous_headings":"Data preparation","what":"02. Process data.","title":"Basic Example","text":"Next , filter data file. step fast (2 - 3 seconds per sample) can run locally single CPU. following filtering approach found nQuire: minimum depth 10, allele trunctation minimum 0.15, allele truncation maximum 0.85. filtering like ? accurate model sample set filtering approach using normal distribution implementation alpha free uniform mixture. model filtering approach led 97% accuracy 186 samples ploidal level correctly assigned.","code":"inpathtext <- \"../inst/extdata/02_prepared/\" newfilelist <- list.files(path = inpathtext, pattern = \"*.txt\" ) for(i in 1:length(newfilelist)){ samp <- newfilelist[i] temp <- process_data(paste0(inpathtext, samp), min.depth = 10, max.depth.quantile.prob = 1, error = 0.01, trunc = c(0.15,0.85)) write.csv(temp, file = paste0(\"../inst/extdata/03_processed/\", gsub(\".txt\", \"\", samp), \".csv\"), row.names = FALSE) }"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"explore-all-models","dir":"Articles","previous_headings":"Model inference","what":"Explore all models","title":"Basic Example","text":"Now ready predict ploidal level samples. using method unexplored sample set, suggest examine data least 18 model types three distributions, total 54 models. functions can run multiple cores. run examples, took us 1.46 - 2.09 seconds run quackNormal(), 6.41 - 23.16 min run quackBeta(), 3.12 - 27.85 min run quackBetaBinom(). total, took 9.54 46.15 min run models sample.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) out1 <- quackNormal(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) out2 <- quackBeta(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) out3 <- quackBetaBinom(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) allout <- rbind(out1, out2, out3) write.csv(allout, file = paste0(\"../inst/extdata/04_output/\", samples[i], \".csv\"), row.names = FALSE) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"model-interpretation","dir":"Articles","previous_headings":"Model inference","what":"Model interpretation","title":"Basic Example","text":"Using function quackit(), can easily interpret model output. selecting models based BIC score considering diploid, triploid, tetraploid mixtures. output sample, can pair outputs key contains sample names ploidal level. identify accurate model data, tally accuracy handy dplyr functions. sample set, know normal distribution alpha free uniform mixture accurate model. now ?","code":"inpathtext <- \"../inst/extdata/04_output/\" samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") for(i in 1:length(samples)){ temp <- read.csv(paste0(inpathtext, samples[i], \".csv\")) summary <- quackit(model_out = temp, summary_statistic = \"BIC\", mixtures = c(\"diploid\", \"triploid\", \"tetraploid\")) write.csv(summary, file = paste0(\"../inst/extdata/05_interpret/\", samples[i], \".csv\"), row.names = FALSE) } # Create key key <- data.frame(sample = c(\"MLG013\", \"MLG014\", \"MLG015\"), ploidal.level = c(\"diploid\", \"tetraploid\", \"triploid\")) # Read in quackit() output dfs <- lapply(list.files(\"../inst/extdata/05_interpret/\", full.names = TRUE ), read.csv) alloutput <- do.call(rbind, dfs) # Combined alloutputcombo <- dplyr::left_join(alloutput, key) # Check the accuracy alloutputcombo <- alloutputcombo %>% dplyr::mutate(accuracy = ifelse(winnerBIC == ploidal.level, 1, 0)) ## What distribution and model type should we use? sumcheck <- alloutputcombo %>% group_by(Distribution, Type) %>% summarize(total = n(), correct = sum(accuracy)) kbl(sumcheck) %>% kable_paper(\"hover\", full_width = F)"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"running-only-the-best-model","dir":"Articles","previous_headings":"Model inference","what":"Running only the best model","title":"Basic Example","text":"unknown samples, can use best model predict ploidal level bestquack() function.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") out <- c() for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) out[[i]] <- bestquack(temp, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = samples[i]) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"bootstrap-replicates","dir":"Articles","previous_headings":"Model inference","what":"Bootstrap replicates","title":"Basic Example","text":"also provide function run bootstrap replicates best model. Warning, print lot ducks. output function includes two rows, first show best model original data set, second tallies bootstrap replicates best model. diploid, see best model replicates diploid! means 100% bootstrap support. can use replicates identify model shouldn’t trusted. example, model known missassign ploidal level MLG129, tetraploid. found 4/1000 bootstrap replicates support correct model. Suggesting deviation one mixture/ploidal level may indicate untrustworthy model. However, likely varies across models sample sets.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") bout <- c() for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) bout[[i]] <- quackNboots(temp, nboots = 100, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = samples[i]) } write.csv(bout[[1]], file = \"../inst/extdata/06_boots/MLG013-boots.csv\", row.names = FALSE) write.csv(bout[[2]], file = \"../inst/extdata/06_boots/MLG014-boots.csv\", row.names = FALSE) write.csv(bout[[3]], file = \"../inst/extdata/06_boots/MLG015-boots.csv\", row.names = FALSE) MLG013boot <- read.csv(\"../inst/extdata/06_boots/MLG013-boots.csv\") MLG013boot ## diploid triploid tetraploid sample ## 1 1 NA NA MLG013 ## 2 100 NA NA MLG013 temp <- as.matrix(read.csv(\"../inst/extdata/06_boots/MLG129.csv\")) check <- quackNboots(temp, nboots = 1000, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = \"MLG129\") write.csv(check, file = \"../inst/extdata/06_boots/MLG129-boots.csv\", row.names = FALSE) MLG129boot <- read.csv(\"../inst/extdata/06_boots/MLG129-boots.csv\") MLG129boot ## diploid triploid tetraploid sample ## 1 1 NA NA MLG129 ## 2 996 NA 4 MLG129"},{"path":"http://mlgaynor.com/nQuack/articles/BiasEstimation.html","id":"simulated-data","dir":"Articles","previous_headings":"","what":"Simulated Data","title":"Bias Estimation","text":"simulated 2000 data sets ploidal level. ploidal level, set simulated proportions, mean, variance equal mixture models starting parameters. simulated 25000 sites data set, 100 replicates per parameter set. included 20 parameter sets differed coverage. distribution coverage simulated data sets can seen :","code":""},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"align-to-a-reference-genome","dir":"Articles","previous_headings":"Preprocessing","what":"Align to a reference genome","title":"Data Preparation","text":"processing raw sequence data, align reads reference genome. use bwa-mem2 align reads. converted SAM files BAM files using samtools. bash script:","code":"# Load module module load bwa-mem2/2.2.1 module load samtools/1.15 # Index the reference bwa-mem2 index reference_genome.gz # Mapping ## -t Number of threads, here we run our mapping on 10 threads. ## -M Indexed reference genome. bwa-mem2 mem -t 10 -M reference_genome.gz sample_001_1.fastq sample_001_2.fastq > sample_001.sam # SAM to BAM samtools view -S -b sample_001.sam > sample_001.bam # SORT samtools sort sample_001.bam -o sample_001.bam"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"identify-repeat-regions","dir":"Articles","previous_headings":"Preprocessing > Remove repeats","what":"Identify repeat regions","title":"Data Preparation","text":"Prior running samples nQuack, suggest repeat regions removed. remove repeats, first identify . utilize repeat modeler repeat masker identify mask repeats. also make database mask repeats, used remove repeats samples alignment.","code":"## Load modules module load repeatmodeler/2.0 module load repeatmasker/4.1.1 # Set database name databasename=Species # Build database based on the reference genome BuildDatabase -name $databasename ReferenceGenome.fasta # Repeat Modeler ## -LTRStruct = runs the LTR structural discovery pipeline ( LTR_Harvest and LTR_retreiver ) ### and combine results with the RepeatScout/RECON pipeline. RepeatModeler -pa 36 -database $databasename -LTRStruct > out.log # Move and gzip the database created mv RM* 01_RepMod cd 01_RepMod tar cvzf RepMod_rounds.tar.gz round-* LTR* tmp* rm -r round-* LTR* tmp* gzip families* cd .. mv $databasename* 01_RepMod/ cp ReferenceGenome.fasta $databasename.fasta # Repeat Masker ## -pa # of threads ## -a return alignment ## -xsmall returns with masked lowercased ## -gff creates the gene feature finding formatted output ## -lib indicates the library, alternatively you could indicate the species ## -dir indicates output directory ## .fasta = reference genome RepeatMasker -pa 24 -a -xsmall -gff -lib 01_RepMod/consensi.fa.classified -dir 02_RepMask $databasename.fasta # Prepare for use mkdir 03_database cut -f1,4,5 02_RepMask/$databasename.fasta.out.gff | perl -pi -e 's/^#.*\\n//g' > 03_database/ref2_$databasename.gff.bed"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"remove-repeats-from-your-alignment","dir":"Articles","previous_headings":"Preprocessing > Remove repeats","what":"Remove repeats from your alignment","title":"Data Preparation","text":"Based bed file created , remove repeats alignment samtools function veiw.","code":"# Load modules module load samtools/1.12 # Make directories mkdir repeats_removed ## Remove Repeats samtools view sample_001.bam -b -h -o /dev/null -U repeats_removed/sample_001.bam -L 03_database/ref2_$databasename.gff.bed"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"filter-low-quality","dir":"Articles","previous_headings":"Preprocessing","what":"Filter low quality","title":"Data Preparation","text":"remove regions low quality based MAPQ score. MAPQ equal -10*log-base-10(Pr(mapping position wrong)), rounded nearest integer (see ). example, wanted remove site 50% chance mapped wrong position, set filter 4. calculation MAPQ score depends alignment software ( see ), therefore difficult pinpoint score needed remove reads map multiple locations. take stringent approach remove sites 10% chance mapped wrong location set -q flag 10.","code":"ceiling(-10*log10(0.5)) #> [1] 4 # Load modules module load samtools/1.15 # Make directories mkdir filtered ## Remove Repeats samtools view -b -q 10 repeats_removed/sample_001.bam > filtered/sample_001.bam"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"step-1-prepare-","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Step 1: Prepare.","title":"Data Preparation","text":"running model, data must converted tab-seperated text file. , wrote custom function prepare_data() convert BAM file tab-seperated text file samtools. function, must supply filename (without .bam ending), path directory containing BAM files, path directory want processed files saved.","code":"## Prepare many samples inpath <- \"filtered/\" outpath <- \"Processed/\" filelist <- list.files(path = inpath, pattern = \"*.bam\" ) filelist <- gsub(\".bam\", \"\", filelist) for( i in 1:length(filelist)){ prepare_data(filelist[i], inpath, outpath) }"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"warning-samtools-must-be-local","dir":"Articles","previous_headings":"Data processing with nQuack > Step 1: Prepare.","what":"Warning: samtools must be local!","title":"Data Preparation","text":"working personal computer, just make sure samtools installed callable “samtools”. working cluster, may need install samtools locally. Though location install may differ, installed samtools locally UF’s amazing HiPerGator slurm cluster:","code":"mkdir bin cd bin ln -s /apps/samtools/1.15/bin/samtools samtools"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"step-2-process","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Step 2: Process","title":"Data Preparation","text":"bam converted tab-seperated text file, must process read R environment. function, provide three options filtering data: Total coverage can filtered based minimum sequencing depth maximum sequencing depth quantile probability (coverage falls max.depth.quantile.prob removed). Allele coverage can filtered based sequencing error rate, coverage allele must total coverage times error rate: \\(Coverage() > Coverage(+ B)*(error)\\), less total coverage times one minus error rate: \\(Coverage() < Coverage(+ B)*(1-error)\\) . Finally, sites may filtered based calculated allele frequency, removing sites lower bound, \\(C_{L}\\) , upper bound , \\(C_{U}\\) . Allele frequency can filtered based minimum maximum allele frequency. Finally, avoid data duplication, randomly sample allele equal probability site. resulting data set includes total coverage per site coverage associated randomly sampled allele.","code":"(1) Total coverage filter (2) Allele coverage filter (3) Allele frequency filter ## Prepare many samples textfiles <- list.files(path = \"Processed/\", pattern = \"*.txt\", full.name = FALSE) for(i in 1:length(textfiles)){ temp <- process_data(paste0(\"Processed/\", textfiles[i]), # File with full location min.depth = 2, # Total coverage gilter max.depth.quantile.prob = 0.9, # Total coverage filter error = 0.01, # Allele Coverage Filter trunc = c(0,0)) # Allele Frequency Filter assign((gsub(\".txt\", \"\", textfiles[i])), temp) }"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"coverage-filters-","dir":"Articles","previous_headings":"Data processing with nQuack > Step 2: Process > When should you filter your data?","what":"Coverage filters.","title":"Data Preparation","text":"Increasing decreasing minimum maximum coverage filter parameters might necessary data set. Total coverage can inspected based output process_data(), call output xm. x-axis coverage histogram goes way targeted sequencing depth, need decrease maximum depth quantile probability.","code":"# Plot hist(xm[,1]) ## Error cutoffs ### If I increase the sequence error rate, how many sites will likely be removed? new.e <- 0.02 # 2 sites out of every 100 removes <- c() for(i in 1:nrow(xm)){ if(xm[i,2] < (xm[i,1]*new.e) | xm[i,2] > (xm[i,1]*(1-new.e))){ removes[i] <- 1 }else{ removes[i] <- 0 } } sum(removes)"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"allele-frequency","dir":"Articles","previous_headings":"Data processing with nQuack > Step 2: Process > When should you filter your data?","what":"Allele Frequency","title":"Data Preparation","text":"Previous methods automatically truncated data allele frequency, removing site frequencies 0.1 0.9. first pass, suggest truncate data. However, data processed, plot data inspect. Phytophthora infestans (ENA:ERR1990235, Triploid), processed data truncation: Notice U-shaped ends? Well removed prior ploidal estimation. simply set trunc = c(0.15,0.85) reinspected data: One last note - expect 6x samples, careful truncation, mean one mixtures 0.16 hexaploid.","code":"# Convert to allele frequency xi <- xm[,2]/xm[,1] # Plot hist(xi)"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"comparing-nquack-to-nquire","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Comparing nQuack to nQuire","title":"Data Preparation","text":"prepare data nQuack, use samtools mpileup, similarly nQuire, removes reads align PCR duplicates (samtool flags: BAM_FUNMAP BAM_FDUP). default, nQuire removes sites coverage less 10, well sites frequency less 0.2 0.8. allow users modify parameters text file creater rather default arbitrary cutoffs. mimic nQuire’s defaults triploid mentioned (Phytophthora infestans, ENA:ERR1990235). nQuack, can replicate nQuire data frame desired. happen like nQuire’s data preparation , uses data program. processing samples nQuire’s create view functions, resulting txt file can read R. prepare data frame nQuack, reduce three column data frame two columns randomly sampling allele B every site. created function help .","code":"# Read in nQuire txt file df <- process_nquire(\"file.txt\")"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"what-about-denoising","dir":"Articles","previous_headings":"Data processing with nQuack","what":"What about ‘denoising’?","title":"Data Preparation","text":"Noise can get way model selection, identifying data real noise difficult. filter allele frequencies normal + uniform mixture model. nQuack uses scaled probability data point belonging mixture model, inferred expected maximization algorithm. remove allele frequencies probability belonging uniform mixture higher probability belonging mixture. also implement nQuire’s denoise method , utilizes inferred alpha parameter histogram base frequencies filter data. method without faults. Notably, method poor job truncating allele frequencies needs done prior attempting method. example, triploid mentioned (Phytophthora infestans, ENA:ERR1990235), truncated c(0.1, 0.9) peak toward right still retained. However, truncate inital data set c(0.15, 0.85), much cleaner data set returned denoise_data() function:","code":""},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"alternative-approach---bclean","dir":"Articles","previous_headings":"Data processing with nQuack > What about ‘denoising’?","what":"Alternative approach - Bclean","title":"Data Preparation","text":"shape scale parameters associated beta distribution less 1, distribution U-shaped. can leverage U-shaped distribution remove noise. utilize beta distribution three mixtures representing cytotypes included nQuack two mixtures representing U-shaped distribution. constrained first three mixtures shape scale parameters 1, last two mixtures shape scale constrained less 1. implementation expected maximization, utilizes scaled probability data point belonging mixture model remove sites probability belonging U-shaped mixture higher probability belonging mixture. Due computational time needed run expected maximization algorithm, default, simple calculate probability matrix E-step run complete algorithm. great alternative allele truncation. example Bclean() function applied nQuire’s samples including Phytophthora infestans diploid (99189) triploid (88069), Saccharomyces cerevisiae diploid (SRR3265396), triploid (SRR3265389), tetraploid (SRR3265401): method can also applied truncation catch extra peak:","code":""},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"bclean-and-denoise","dir":"Articles","previous_headings":"Data processing with nQuack > What about ‘denoising’?","what":"Bclean and Denoise","title":"Data Preparation","text":"can also add two functions together, however may reduce data set dramatically impact model inference thoroughly explored. Now ready run nQuack!","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"site-based-heterozygosity","dir":"Articles","previous_headings":"","what":"Site-based heterozygosity","title":"Model Options","text":"biallelic site diploid allele’s B, expect sequence alleles equal proportions 50% sequences representing 50% sequences representing B. simple idea foundation site-based heterozygosity methods. approaches based biallelic single nucleotide polymorphisms within individual expected number copies allele.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"nquack","dir":"Articles","previous_headings":"","what":"nQuack","title":"Model Options","text":"Like previous site-based heterozygosity approaches predict ploidal level, use mixture model approach estimate likely ploidal level. nQuack provides multiple implementations expectation maximization (EM) algorithm, including implementations : normal distribution mimics nQuire, corrected normal distribution, beta distribution, beta-binomial distribution. implementations provided without uniform mixtures, total eight implementations. details available implementations use .","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"model-types","dir":"Articles","previous_headings":"nQuack","what":"Model types","title":"Model Options","text":"nQuire’s implementation expectation maximization normal distribution two types models: “free” variable estimated “fixed” variance estimated. nQuire calculate log-likelihood ratio free fixed models identify sample diploid, triploid, tetraploid. simplest approach expand higher ploidal levels just add distribution fixed mixtures examined. However, mixture models additional components might helpful predicting ploidal level. figure shows basic components mixture model include mean (\\(\\mu\\)), variance (\\(\\sigma\\)), proportion (alpha, \\(\\alpha\\)). components, expectations mean, expected frequencies allele, seen table . know proportions may differ allopolyploid compared autopolyploid, interested exploring models alpha free. ended coding implementation provide estimates parameters free (type = 'free'), alpha free (type = 'fixed'), alpha variance free (type = 'fixed_2'), variance free (type ='fixed_3). nQuire’s fixed models predict variance resemble type = 'fixed_3' implementation.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"additional-arguments-","dir":"Articles","previous_headings":"nQuack","what":"Additional arguments.","title":"Model Options","text":"expectation maximization function, starting parameter list (parm.list) must provided includes avec, mvec, svec, represent \\(\\alpha\\), \\(\\mu\\), \\(\\sigma\\) mixture. length input vector equal, unless distribution includes uniform mixture, avec include value uniform distribution. values must greater 0 less equal 1, sum(avec) equal 1. normal beta distributions, allele frequency provided (xi), however beta-binomial mixtures expect total coverage coverage randomly sampled allele (xm). Convergence algorithm occurs set niters iterates, difference augmented log-likelihood values less epsilon. also allow truncation probability density function match allele frequency truncation may done data cleaning (trunc). notes: equivalent nQuire model emstepNU() function. Implementations Nelder-Mead numerical optimization unable estimate single alpha value, therefore diploid mixtures estimated emstepB(type = \"fixed\")emstepBB(type = \"fixed\") log-likelihood calculated based sum probability density functions mixtures. beta beta-binomial use Nelder-Mead numerical optimization, computationally ‘slow’. provide packaged functions apply methods . Expectation maximization implementations can found : Normal: emstepNA() Normal uniform: emstepNUA() nQuire’s normal: emstepN() nQuire’s normal uniform: emstepNU() Beta: emstepB() Beta uniform: emstepBU() Beta-binomial: emstepBB() Beta-binomial uniform: emstepBBU()","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"mixture-model-implementation-","dir":"Articles","previous_headings":"","what":"Mixture model implementation.","title":"Model Options","text":"expect anyone program starting parameters needed run mixture model, designed functions help.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"implementation","dir":"Articles","previous_headings":"Mixture model implementation.","what":"Implementation","title":"Model Options","text":"quackNormal() quackNormalNQ() quackBeta() quackBetaBinom() functions can run 32 mixture models . matrix total coverage coverage randomly sampled allele (xm) supplied functions. functions can run parallel resources avaliable. input parameters mixture based table variance equal 0.01 alpha divided equally among expected frequencies. example, triploid: mvec = c(0.33, 0.67), avec=c(0.50, 0.50), svec=c(0.01, 0.01). uniform mixture included, allocate 0.1 avec mixture, therefore, avec = c(0.45, 0.45, 0.1). function, input mean variance can transformed based sequencing error rate sequencing overdispersion parameter (see supplemental methods information). Variance can also set 0.001. bestquack() Sometimes may want run 32 mixture models, created function lets pick distribution, type, mixtures included.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"interpretation","dir":"Articles","previous_headings":"Mixture model implementation.","what":"Interpretation","title":"Model Options","text":"Based functions, users able obtain BIC scores, log-likelihood, log-likelihood ratios (\\(\\Delta\\log{L}\\)) mixture. BIC \\(\\Delta\\log{L}\\), lowest value likely model. Based log-likelihood, highest value likely model. recommend using BIC penalized sample size number parameters estimated. BIC lead less errors model selection (see Dennis et al. 2019). select best model based model outputs, provide function quackit().","code":""},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"simulate-data","dir":"Articles","previous_headings":"","what":"Simulate Data","title":"Outliers","text":"Following Figure S2 WeiB et al. 2018 (nQuire), simulate 10 sites binomial distribution probability 0.5 coverage equal 100. data set outlier, simulate single site coverage equal 400.","code":"dipNorm <- data.frame(matrix(ncol = 3, nrow =10)) for(i in 1:10){ dipNorm[i,1] <- 100 dipNorm[i,2] <- rbinom(n = 1, size = 100, prob = 0.5) dipNorm[i,3] <- dipNorm[i,1] - dipNorm[i,2] } dipBias <- data.frame(matrix(ncol = 3, nrow =10)) coverage <- c(rep(100, 9), 400) prob <- c(rep(0.5, 9), 0.5) for(i in 1:10){ dipBias[i,1] <- coverage[i] dipBias[i,2] <- rbinom(n = 1, size = coverage[i], prob = prob[i]) dipBias[i,3] <- dipBias[i,1] - dipBias[i,2] }"},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"comparing-the-log-likelihood","dir":"Articles","previous_headings":"","what":"Comparing the Log-Likelihood","title":"Outliers","text":"calculate simple log-likelihood given expected parameter values diploid triploid model. cases, found diploid likely model simulated data. Unlike simulations nQuire, find dramatic effect log-likelihood calculations outliers. attempted partition data match Figure S2 WeiB et al. 2018, however, never create binomial distribution likely model triploid. However, allele frequency high coverage outlier deviates expected (case, set probability 0.2), see log-likelihood binomial normal distributions greatly effected outlier. However, find diploid model likely cases. Noteably, see small shifts log-likelihood associated beta beta-binomial distributions.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"tldr","dir":"Articles","previous_headings":"","what":"TLDR","title":"Outliers","text":"beta beta-binomial distributions look promising. find dramatic effect log-likelihood calculations outliers previously reported.beta beta-binomial distributions","code":""},{"path":"http://mlgaynor.com/nQuack/articles/SimulateData.html","id":"simple-or-idealisitic","dir":"Articles","previous_headings":"","what":"Simple or Idealisitic","title":"Simulate Data","text":"simple approach simulating data can done sim.ind.simple() coverage equal among sites counts allele sampled binomial distribution. example, want simulate 5000 sites 100x coverage diploid triploid, following: creates beautiful plots! However, reality look like data collect.","code":"dip <- sim.ind.simple(mvec = 0.5, cover = 100, s.size = 5000, sampled = TRUE) tri <- sim.ind.simple(mvec = c(0.33, 0.67), cover = 100, s.size = 5000, sampled = TRUE) par(mfrow=c(1,2)) hist(dip[,2]/dip[,1], main = \"Diploid\", xlab = \"Allele Frequency\", xlim = c(0,1)) hist(tri[,2]/tri[,1], main = \"Triploid\", xlab = \"Allele Frequency\", xlim = c(0,1))"},{"path":"http://mlgaynor.com/nQuack/articles/SimulateData.html","id":"advanced-or-realistic","dir":"Articles","previous_headings":"","what":"Advanced or Realistic","title":"Simulate Data","text":"simulate realistic data, two ‘advance’ options: sim.ind.BB() sim.ind.BB.tau(). functions, total coverage site sampled truncated poisson distribution (Pfenninger et al. 2022), rtrunc function truncdist. Given randomly selected proportion (.e. mean associated variance), copies allele defined binomial sample probability defined beta distribution (.e. beta-binomial) copies allele B equal remainder. Data filtered remove homozygous sites one allele sampled chance. filtered sites based total coverage sequencing coverage allele. function can also filter sites based truncated allele frequencies. Finally, randomly sample allele equal probability site. resulting data set includes total coverage per site coverage associated randomly sampled allele. want use one advance functions simulate similar scenario , might look something like : ploidal level samples definitely clear cut !","code":"dip.adv <- sim.ind.BB(mvec = 0.5, svec = 0.1, avec = 1.0, error = 0.001, s.size = 5000, max.coverage = 100, min.coverage = 50, lambda = 75) tri.adv <- sim.ind.BB(mvec = c(0.33, 0.67), avec = c(0.5, 0.5), svec = c(0.01, 0.01), error = 0.001, s.size = 5000, max.coverage = 100, min.coverage = 50, lambda = 75) par(mfrow=c(1,2)) hist(dip.adv[,2]/dip.adv[,1], main = \"Diploid - Advanced\", xlab = \"Allele Frequency\", xlim = c(0,1)) hist(tri.adv[,2]/tri.adv[,1], main = \"Triploid - Advanced\", xlab = \"Allele Frequency\", xlim = c(0,1))"},{"path":"http://mlgaynor.com/nQuack/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Michelle L. Gaynor. Author, maintainer. José Miguel Ponciano. Contributor.","code":""},{"path":"http://mlgaynor.com/nQuack/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Gaynor M, Landis J, O'Connor T, Laport R, Doyle J, Soltis D, Ponciano J, Soltis P (2024). “nQuack: R package predicting ploidal level sequence data using site-based heterozygosity.” bioRxiv. doi:10.1101/2024.02.12.579894.","code":"@Article{, title = {nQuack: An R package for predicting ploidal level from sequence data using site-based heterozygosity}, year = {2024}, journal = {bioRxiv}, publisher = {Cold Spring Harbor Laboratory}, doi = {10.1101/2024.02.12.579894}, author = {Michelle L. Gaynor and Jacob B. Landis and Timothy K. O'Connor and Robert G. Laport and Jeff J. Doyle and Douglas E. Soltis and José Miguel Ponciano and Pamela S. Soltis}, }"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"nquack","dir":"","previous_headings":"","what":"Predicting ploidal level from sequence data using site-based heterozygosity ","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"Michelle L. Gaynor, Jacob B. Landis, Tim K. O’Connor, Robert G. Laport, Jeff J. Doyle, Douglas E. Soltis, José Miguel Ponciano, Pamela S. Soltis","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"nQuack modified statistical framework predict ploidy level based sequence data. build upon Weib et al., 2018 Gaussian Mixture Model approach estimate ploidy level, originally written C executable.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"more-on-nquack","dir":"","previous_headings":"","what":"More on nQuack","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"provided expanded tools implementations improve site-based heterozygosity inferences ploidal level. nQuack provides data preparation guidance tools decrease noise input data. include maximum sequence coverage quantile filter sequence error-based filter, remove biallelic sites likely representative copy number variance nuclear genome. also consider frequency allele B site, instead , found methods. learn best practices, see Data Preparation guide. model improves upon nQuire framework extending higher ploidal levels (pentaploid hexaploid), correcting augmented likelihood calculation, implementing suitable distribution, allowing additional ‘fixed’ models. also decrease model selection errors relying BIC rather likelihood ratio tests. learn methods, see Model Options guide. provide 32 ways estimates likelihood mixture models expectation maximization algorithm (see ) - 8 expectation maximization implementations 4 model types . total, nQuack offers 128 models.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"evaluation-of-nquack","dir":"","previous_headings":"","what":"Evaluation of nQuack","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"examine utility method, examined 513,792 models based simulated real samples. using method, suggest read manuscript consider many limitations pattern-based approach determining ploidal level.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"","code":"install.packages(\"devtools\") devtools::install_github(\"mgaynor1/nQuack\")"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"warning-samtools-must-be-local","dir":"","previous_headings":"Installation","what":"Warning: samtools must be local!","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"working personal computer, make sure samtools installed callable “samtools” via terminal. working cluster, may need symbolically-link samtools locally. Though location install may differ, make samtools callable locally UF’s amazing HiPerGator slurm cluster: implementation, see Basic Example article.","code":"mkdir bin cd bin ln -s /apps/samtools/1.15/bin/samtools samtools"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, Soltis PS. nQuack: R package predicting ploidy level sequence data using site-based heterozygosity. Review. See preprint bioRxiv, doi.org: 10.1101/2024.02.12.579894","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"up-next","dir":"","previous_headings":"","what":"Up Next:","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"sequence data known ploidal level mixed-ploidy system, let us know. love collaborate .","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove noise with the beta distribution — Bclean","title":"Remove noise with the beta distribution — Bclean","text":"filter allele frequencies beta mixture model contains 5 mixtures: three mixtures representing cytotypes included nQuack two mixtures representing U-shaped distribution. constrained first three mixtures shape scale parameters 1, last two mixtures shape scale constrained less 1. implementation expectation-maximization, utilizes scaled probability data point belonging mixture model remove site probability belonging U-shaped mixture higher probability belonging mixture. Due computational time needed run expectation-maximization algorithm, default, simple calculate probability matrix E-step run complete algorithm.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove noise with the beta distribution — Bclean","text":"","code":"Bclean(xm, plot = TRUE, quick = TRUE)"},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove noise with the beta distribution — Bclean","text":"xm Matrix total coverage coverage randomly sampled allele. plot Default TRUE. plots share y-axis, careful interpretation key. Warning, nothing removed, plot removed data missing. quick Default TRUE. set FALSE, expectation-maximization algorithm run full.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove noise with the beta distribution — Bclean","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"","code":"alphabetacalc(mu, var)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"mu Mean. var Variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"Numeric vector alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"Calculate Alpha Beta Mean, Tau, Error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"","code":"alphabetacalctau(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"mu Mean. tau Overdispersion parameter. Ranges 0 1, 0 indicates less overdispersion 1 indicates high overdispersion. tau must greater 0. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"Numeric vector alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"Vector-based - Calculate Alpha Beta Mean, Tau, Error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"","code":"alphabetacalctauvec(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"mu Vector mean. tau Overdispersion parameter. Ranges 0 1, 0 indicates less overdispersion 1 indicates high overdispersion. tau must greater 0. error Sequencing error rate. Ranges 0 1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"Numeric matrix alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"Vector-based - Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"","code":"alphabetacalcvec(mu, var)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"mu Vector mean. var Vector variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"Numeric matrix alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":null,"dir":"Reference","previous_headings":"","what":"BIC Calculations - Bad data — bdBICcalc","title":"BIC Calculations - Bad data — bdBICcalc","text":"function used model selection calculate BIC uniform distribution, represent data discernible pattern.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"BIC Calculations - Bad data — bdBICcalc","text":"","code":"bdBICcalc(xi)"},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"BIC Calculations - Bad data — bdBICcalc","text":"xi Vector allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"BIC Calculations - Bad data — bdBICcalc","text":"BIC score bad data model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"function made run subset models based selected distribution type. many limitations function make tractable, 128 models run package. include models comparisons found unhelpful, includes nQuire implementation log-likelihood ratio tests.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"","code":"bestquack( xm, distribution, type, uniform, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"), samplename, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA )"},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"xm Matrix two columns total coverage coverage randomly sampled allele. distribution May set normal, beta, beta-binomial. include implementation nQuire. type May equal fixed, fixed_2, fixed_3. uniform equal 1, uniform mixture included. equal 0, uniform mixture included. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"). samplename Name sample included output. trunc List two values representing lower upper bounds allele frequency truncation ,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"BIC scores log-likelihood (LL) included mixture models. BIC, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta — bootB","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"Bootstrap Model Selection - Helper Function - Beta Bootstrap Model Selection - Helper Function - Beta-Binomial","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"","code":"bootB(pset, xi, trunc, boots, niter = 1000L, epsilon = 0.1) bootBB(pset, xm, trunc, boots, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$. boots Number bootstraps conduct. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. xm Matrix first column total coverage second count base B.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge. returned matrix, first row represents true data following rows represent permutations data. Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge. returned matrix, first row represents true data following rows represent permutations data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"function used model selection Beta mixed model. include haploid BIC calculation function. Warning, can slow.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"","code":"bootstrapB(xm, trunc, boots, return = \"complex\")"},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. boots Number bootstrap replications run. return much information return. Default \"complex\". numbers scare , pick \"simple\"!","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"function used model selection Beta mixed model. include haploid BIC calculation function. Warning, can slow.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"","code":"bootstrapBB(xm, trunc, boots, return = \"complex\")"},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. boots Number bootstrap replications run. return much information return. Default \"complex\". numbers scare , pick \"simple\"!","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"function mimics nQuire calculates delta log-likelihood (likelihood ratio) based normal-uniform mixtures model types (free fixed).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"","code":"deltaquack.nQuire( xm, trunc = c(0, 0), type = \"alpha\", dup = FALSE, free = \"all\", return = \"both\" )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. dup setting dup = TRUE, data used mixture model include allele frequency allele allele B. form data dredging: single site, frequency frequency B add one (+ B = 1), therefore represent data point. Default = FALSE. free function originally built make sure understood nQuire calculations, therefore provide free model nQuire mixtures representing diploids, triploids, tetraplpoids. default, set free model represent \"\" ploidal levels tested nQuack. Options: \"nQuire\" \"\". return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"function can used estimate likely ploidal level using beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"","code":"deltaquackB( xm, trunc = c(0, 0), cores = NA, type = \"alpha\", free = \"BUM\", return = \"both\" )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. free Indicates free model utilize. Defaults 'BUM', beta-uniform model. Options: 'BUM' (Beta-uniform mixture), 'BM' (Beta mixture). return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"function can used estimate likely ploidal level using bet","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"","code":"deltaquackBB( xm, trunc = c(0, 0), cores = NA, type = \"alpha\", free = \"BUM\", return = \"both\", error = NULL, tau = NULL )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. free Indicates free model utilize. Defaults 'BUM', beta-uniform model. Options: 'BUM' (Beta-uniform mixture), 'BM' (Beta mixture). return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''. error Sequencing error rate. tau Sequence overdispersion parameter read counts.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"function mimics nQuire calculates delta log-likelihood (likelihood ratio) based normal-uniform mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"","code":"deltaquackN(xm, trunc = c(0, 0), type = \"alpha\", dup = FALSE, return = \"both\")"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. dup setting dup = TRUE, data used mixture model include allele frequency allele allele B. form data dredging: single site, frequency frequency B add one (+ B = 1), therefore represent data point. Default = FALSE. return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Denoise Data — denoise_data","title":"Denoise Data — denoise_data","text":"filter allele frequencies normal + uniform mixture model. nQuack utilizes scaled probability data point belonging mixture model, inferred expectation maximization algorithm. remove allele frequencies probability belonging uniform mixture higher probability belonging mixture. also implement nQuire's denoise method , utilizes inferred alpha parameter histogram base frequencies filter data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Denoise Data — denoise_data","text":"","code":"denoise_data(xm, plot = TRUE, filter = \"both\")"},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Denoise Data — denoise_data","text":"xm Matrix total coverage coverage randomly sampled allele. plot Default TRUE. plots share y-axis, careful interpretation key. Warning, nothing removed, plot removed data missing. filter Indicates method remove data based upon. Options: '', 'nquire', 'nquack'. nQuack utilizes scaled probability data point belonging mixture model, removing sites probability belonging uniform mixture higher probability belonging mixture. nQuire utilizes inferred alpha parameter histogram base frequencies filter data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Denoise Data — denoise_data","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta Distribution — emstepB","title":"Expectation maximization - Beta Distribution — emstepB","text":"function calculates log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta Distribution — emstepB","text":"","code":"emstepB(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta Distribution — emstepB","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed-2\" (estimated parameter(s): alpha variance), \"fixed-3\" (estimated parameter(s): variance). avec length 1, fixed fixed-3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta Distribution — emstepB","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"function made Bclean() function preforms expectation maximization Nelder-Mead numerical optimization beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"","code":"emstepB3(parmlist, xi, niter, epsilon, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"parmlist list containing initial alpha, mean, variance. xi Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"List elements including negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta-Binomial Distribution — emstepBB","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"function calculates negative log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta-binomial distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"","code":"emstepBB(parmlist, xm, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"parmlist list containing initial alpha, mean, variance. xm Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating \"Free\" \"Fixed\".","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"List elements including negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"function calculates log-likelihood using expectation-maximization algorithm Nelder-Mead numerical optimization beta distribution one uniform mixture.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"","code":"emstepBBU(parmlist, xm, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"parmlist list containing initial alpha, mean, variance values. xm Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed-2\" (estimated parameter(s): alpha variance), \"fixed-3\" (estimated parameter(s): variance). avec length 1, fixed fixed-3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta and Uniform Distributions — emstepBU","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"function calculates log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta distribution one uniform mixture.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"","code":"emstepBU(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance). avec length 1, fixed fixed_3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepN","title":"Expectation maximization - Normal Distribution — emstepN","text":"function calculates log-likelihood using expectation maximization algorithm Normal Distribution. code follows nQuire use augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepN","text":"","code":"emstepN(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepN","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepN","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepNA","title":"Expectation maximization - Normal Distribution — emstepNA","text":"function calculates log-likelihood using expectation maximization algorithm Normal Distribution. code identical nQuire uses augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepNA","text":"","code":"emstepNA(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepNA","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepNA","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal and Uniform Distribution — emstepNU","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"function calculates log-likelihood using expectation maximization algorithm Normal-Uniform Distribution. code follows nQuire use augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"","code":"emstepNU(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"parmlist list containing initial alpha, mean, variance values. list alpha must include proportion uniform mixture. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepNUA","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"function calculates log-likelihood using expectation maximization algorithm Normal-Uniform Distribution. code identical nQuire uses augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"","code":"emstepNUA(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"parmlist list containing initial alpha, mean, variance values. list alpha must include proportion uniform mixture. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":null,"dir":"Reference","previous_headings":"","what":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"used Bclean() function. complete E-Step calculate log-likelihood. Modifications include correction truncated distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"","code":"estepB3(parmlist, xi, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"parmlist list containing initial alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":null,"dir":"Reference","previous_headings":"","what":"BIC Calculations - Haploid — haploidBICcalc","title":"BIC Calculations - Haploid — haploidBICcalc","text":"function used model selection calculate BIC haploid model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"BIC Calculations - Haploid — haploidBICcalc","text":"","code":"haploidBICcalc(xi, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"BIC Calculations - Haploid — haploidBICcalc","text":"xi Vector allele frequencies. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. @returns BIC score Haploid model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":null,"dir":"Reference","previous_headings":"","what":"Haploid log-likelihood calculation — haploidoptim","title":"Haploid log-likelihood calculation — haploidoptim","text":"Haploid log-likelihood calculation","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Haploid log-likelihood calculation — haploidoptim","text":"","code":"haploidoptim(xi)"},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Haploid log-likelihood calculation — haploidoptim","text":"xi List allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Haploid log-likelihood calculation — haploidoptim","text":"Log-likelihood given shape scale (alpha beta) less one.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"Bootstrap Model Selection - Helper Function - Beta","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"","code":"mmselectB(pset, xi, trunc, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"Bootstrap Model Selection - Helper Function - Beta-Binomial","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"","code":"mmselectBB(pset, xm, trunc, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xm Matrix first column total coverage second count base B. trunc List two values representing lower upper bounds, $c_L$ $c_U$. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"function used calculate variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"","code":"muvarcalcvec(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"mu Vector means. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"Mean variance associated tau error.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":null,"dir":"Reference","previous_headings":"","what":"Data Preparation - Use nQuire's Data — nQuire_reformat","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"function reduce three column data frame two columns randomly sampling allele B every site. used function process_nquire()","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"","code":"nQuire_reformat(xm)"},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"xm matrix three columns: Total Coverage, Counts Allele , Counts Allele B.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"Numeric Matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepare data - Step 1 — prepare_data","title":"Prepare data - Step 1 — prepare_data","text":"function transforms BAM file text file. Specifically, function uses samtools mpileup translate BAM tab-separated file. filter file remove indels deletions. running function, temporary folder created (named 'temp/'), however folder removed process complete.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepare data - Step 1 — prepare_data","text":"","code":"prepare_data(name, inpath, outpath)"},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prepare data - Step 1 — prepare_data","text":"name File name without suffix. example, file called \"frog.bam\", input \"frog\". inpath Location input file. outpath Location output file.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepare data - Step 1 — prepare_data","text":"Writes text file following columns: chromosome, position, depth, , C, G, T.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Prepare data - Step 1 — prepare_data","text":"Warning, due processing time needed samtools mpileup, step may take time. function also requires samtools located locally. Please see Data Preparation article information. Warning, writes temporary folder titled 'temp'. want run multiple samples , suggest set working directory separate locations ensure temp folder/files overwritten.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Process data - Step 2 — process_data","title":"Process data - Step 2 — process_data","text":"Based file generated prepare_data(), contains total depth sequencing coverage nucleotide (, C, G, T), function remove single nucelotide polymorphisms. supplied, function filter coverage allele frequency. filter total coverage, user must supply min.depth max.depth.quantile.prob. error provided, sites retained allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. Lastly, based trunc, allele frequencies filtered based provided lower upper bound. Finally, function samples single allele frequency per site avoid data duplication.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Process data - Step 2 — process_data","text":"","code":"process_data( file, min.depth = 2, max.depth.quantile.prob = 0.9, error = 0.01, trunc = c(0, 0) )"},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Process data - Step 2 — process_data","text":"file Output txt file created prepare_data(). min.depth Minimum sequencing depth, default 2. max.depth.quantile.prob Maximum sequencing depth quantile cut , default = 0.9. error Sequencing error rate. error provided, sites retained allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. trunc List two values representing lower upper bounds, \\(c_{L}\\) \\(c_{U}\\) used filter allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Process data - Step 2 — process_data","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":null,"dir":"Reference","previous_headings":"","what":"Use nQuire's Data — process_nquire","title":"Use nQuire's Data — process_nquire","text":"happen like nQuire's data preparation , uses data program. processing samples nQuire's create view functions, resulting text file can read R. prepare data frame nQuack, reduce three column data frame two columns randomly sampling allele B every site.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Use nQuire's Data — process_nquire","text":"","code":"process_nquire(file)"},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Use nQuire's Data — process_nquire","text":"file Output text file created nQuire.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Use nQuire's Data — process_nquire","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":null,"dir":"Reference","previous_headings":"","what":"Data Preparation - Matrix Filtering — process_rcpp","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"Based supplied matrix total depth sequencing coverage nucleotide (, C, G, T) function remove single nucelotide polymorphisms. supplied, function filter coverage allele frequency. Finally, function samples single allele frequency per site avoid data duplication.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"","code":"process_rcpp(x, mindepth, maxprob, trunc, error)"},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"x Matrix five columns: Depth, , C, G, T. mindepth Minimum depth, default = 15. maxprob Maximum depth quantile cut , default = 0.9. trunc List two values representing lower upper bounds, $c_L$ $c_U$. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"Numeric Matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta Distribution — quackB","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"function used expected maximization Beta Mixture Model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"","code":"quackB(xm, trunc = c(0, 0), cores = NA)"},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"function wrapper expected maximization Beta Mixture Model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"","code":"quackBB(xm, trunc = c(0, 0), cores = NA, tau = NULL, error = NULL)"},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"function wrapper expected maximization Beta-Binomial Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"","code":"quackBBM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"function wrapper expected maximization Beta-Binomial-Uniform Mixture Model. function runs free model mixtures. uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"","code":"quackBBUM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"function used expected maximization Beta Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"","code":"quackBM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"function used expected maximization Beta-Uniform Mixture Model. function runs free model mixtures.uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"","code":"quackBUM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"function uses expectation maximization beta beta-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"","code":"quackBeta( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"function uses expectation maximization beta-binomial beta-binomial-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"","code":"quackBetaBinom( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"function used expected maximization Normal Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"","code":"quackNM(xm, trunc = c(0, 0), tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"function used expected maximization Normal-Uniform Mixture Model. function runs free model mixtures.uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"","code":"quackNUM(xm, trunc = c(0, 0), tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"function made assist bootstrap replication set models run subset models based selected distribution type. many limitations function make tractable, 128 models run package. include models comparisons found unhelpful, includes nQuire implementation log-likelihood ratio tests.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"","code":"quackNboots( xm, nboots = 100, distribution, type, uniform, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"), samplename, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"xm Matrix two columns total coverage coverage randomly sampled allele. nboots Number bootstrap replicates examine. distribution May set normal, beta, beta-binomial. include implementation nQuire. type May equal fixed, fixed_2, fixed_3. uniform equal 1, uniform mixture included. equal 0, uniform mixture included. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"). samplename Name sample included output. trunc List two values representing lower upper bounds allele frequency truncation ,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"BIC scores log-likelihood (LL) included mixture models. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"function uses expectation maximization normal normal-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"","code":"quackNormal( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"function uses expectation maximization normal normal-uniform mixture models model selection based nQuire approach. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"","code":"quackNormalNQ( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Based on BIC or Log-Likelihood — quackit","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"function model interpretation.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"","code":"quackit( model_out, summary_statistic = \"BIC\", mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\") )"},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"model_out Data frame containing, minimum, columns labeled LL, type, mixture, distribution, BIC. summary_statistic May equal BIC dLL. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\").","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"Returns data frame likely model set mixtures. Includes best second best mixtures, well difference two.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean and Variance — resample_xm","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"","code":"resample_xm(xm, n)"},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"xm Matrix total coverage coverage randomly sampled allele. n Length matrix.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"Randomly sampled matrix.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"function used replace variance mixture model sets.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"","code":"setconvert(set, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"set list lists, lists must contain avec, mvec, svec. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"Mean variance associated tau error.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"function used simulate coverage allele biallelic heterozygous sites assuming beta binomial distribution. sample sequence depth truncated poisson distribution set minimum, maximum, lambda. heterozygous sites returned. Based input variables, sites may filtered based total coverage (filter.coverage), allele sequencing coverage (filter.error), allele frequency (filter.freq).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"","code":"sim.ind.BB( mvec, avec, svec, error = 0.001, s.size = 50000, lambda = 11, max.coverage = 20, min.coverage = 2, filter.coverage = TRUE, max.depth.quantile.prob = 0.9, filter.error = TRUE, filter.freq = FALSE, trunc = c(0, 0), sampled = TRUE )"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"mvec Vector mean values allele frequency. avec Vector alpha values representing proportion expected mean. svec Vector variance values. error Sequencing error rate. Default 0.001, low error. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. lambda Set lambda truncated poisson distrubtion. Defaults 11. max.coverage Maximum sequencing depth per site. Defaults 20. min.coverage Minimum sequencing depth per site. Defaults 2. filter.coverage Default TRUE. Filters retain sites total sequencing depth greater provided minimum coverage less max quantile depth (set max.depth.quantile.prob). max.depth.quantile.prob Maximum depth quantile probability. Defaults 0.9. filter.error Default TRUE. Filter retain sites allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. filter.freq Default FALSE. set true, sites filtered based provided trunc. trunc List two values representing lower upper bounds,\\(c_{L}\\) \\(c_{U}\\). Defaults c(0,0) represent truncation. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"function used simulate frequency biallelic heterozygous sites assuming beta-binomial distribution. sample sequence depth truncated poisson distribution set minimum, maximum, lambda. heterozygous sites returned. Based input variables, sites may filtered based total coverage (filter.coverage), allele sequencing coverage (filter.error), allele frequency (filter.freq).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"","code":"sim.ind.BB.tau( mvec, avec, tau = 0.01, error = 0.001, s.size = 50000, lambda = 11, max.coverage = 20, min.coverage = 2, filter.coverage = TRUE, max.depth.quantile.prob = 0.9, filter.error = TRUE, filter.freq = FALSE, trunc = c(0, 0), sampled = TRUE )"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"mvec Vector mean values allele frequency. avec Vector alpha values representing proportion expected mean. tau Overdispersion parameter. Defaults 0.01. error Sequencing error rate. Defaults 0.001. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. lambda Set lambda truncated poisson distrubtion. Defaults 11. max.coverage Maximum sequencing depth per site. Defaults 20. min.coverage Minimum sequencing depth per site. Defaults 2. filter.coverage Default TRUE. Filters retain sites total sequencing depth greater provided minimum coverage less max quantile depth (set max.depth.quantile.prob). max.depth.quantile.prob Maximum depth quantile probability. Defaults 0.9. filter.error Default TRUE. Filter retain sites allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. filter.freq Default FALSE. set true, sites filtered based provided trunc. trunc List two values representing lower upper bounds, \\(c_{L}\\) \\(c_{U}\\). Defaults c(0,0) represent truncation. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"function used simulate coverage allele biallelic heterozygous sites assuming binomial distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"","code":"sim.ind.simple(mvec, cover = 100, s.size = 50000, sampled = TRUE)"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"mvec Vector means. cover Coverage sites. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""}] +[{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"data-description","dir":"Articles","previous_headings":"","what":"Data description","title":"Basic Example","text":"“inst/extdata/” folder found github, provide three samples Galax urceolata collected 2021 part dissertation. samples published, submitted NCBI-SRA near future. data generated target enrichment species-specific probes. samples include diploid (MLG013), triploid (MLG015), tetraploid (MLG014). raw files created following preprocessing steps outlined ‘Data Preparation’.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"load-packages","dir":"Articles","previous_headings":"","what":"Load packages","title":"Basic Example","text":"tutorial requires nQuack dplyr.","code":"library(nQuack) library(dplyr) library(kableExtra)"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"prepare-data","dir":"Articles","previous_headings":"Data preparation","what":"01. Prepare data","title":"Basic Example","text":"Warning, step takes results file slightly larger input BAM. time taken step can likely reduce future, however, keep file large conducting true filtering steps stage. suggest running one CPU night cluster, though may take multiple days finish depending data.","code":"# Set in and out paths of files inpath <- \"../inst/extdata/01_raw/\" outpath <- \"../inst/extdata/02_prepared/\" # List files in the inpath and remove their ending filelist <- list.files(path = inpath, pattern = \"*.bam\" ) filelist <- gsub(\".bam\", \"\", filelist) for( i in 1:length(filelist)){ prepare_data(filelist[i], inpath, outpath) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"process-data-","dir":"Articles","previous_headings":"Data preparation","what":"02. Process data.","title":"Basic Example","text":"Next , filter data file. step fast (2 - 3 seconds per sample) can run locally single CPU. following filtering approach found nQuire: minimum depth 10, allele trunctation minimum 0.15, allele truncation maximum 0.85. filtering like ? accurate model sample set filtering approach using normal distribution implementation alpha free uniform mixture. model filtering approach led 97% accuracy 186 samples ploidal level correctly assigned.","code":"inpathtext <- \"../inst/extdata/02_prepared/\" newfilelist <- list.files(path = inpathtext, pattern = \"*.txt\" ) for(i in 1:length(newfilelist)){ samp <- newfilelist[i] temp <- process_data(paste0(inpathtext, samp), min.depth = 10, max.depth.quantile.prob = 1, error = 0.01, trunc = c(0.15,0.85)) write.csv(temp, file = paste0(\"../inst/extdata/03_processed/\", gsub(\".txt\", \"\", samp), \".csv\"), row.names = FALSE) }"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"explore-all-models","dir":"Articles","previous_headings":"Model inference","what":"Explore all models","title":"Basic Example","text":"Now ready predict ploidal level samples. using method unexplored sample set, suggest examine data least 18 model types three distributions, total 54 models. functions can run multiple cores. run examples, took us 1.46 - 2.09 seconds run quackNormal(), 6.41 - 23.16 min run quackBeta(), 3.12 - 27.85 min run quackBetaBinom(). total, took 9.54 46.15 min run models sample.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) out1 <- quackNormal(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) out2 <- quackBeta(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) out3 <- quackBetaBinom(xm = temp, samplename = samples[i], cores = 10, parallel = FALSE) allout <- rbind(out1, out2, out3) write.csv(allout, file = paste0(\"../inst/extdata/04_output/\", samples[i], \".csv\"), row.names = FALSE) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"model-interpretation","dir":"Articles","previous_headings":"Model inference","what":"Model interpretation","title":"Basic Example","text":"Using function quackit(), can easily interpret model output. selecting models based BIC score considering diploid, triploid, tetraploid mixtures. output sample, can pair outputs key contains sample names ploidal level. identify accurate model data, tally accuracy handy dplyr functions. sample set, know normal distribution alpha free uniform mixture accurate model. now ?","code":"inpathtext <- \"../inst/extdata/04_output/\" samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") for(i in 1:length(samples)){ temp <- read.csv(paste0(inpathtext, samples[i], \".csv\")) summary <- quackit(model_out = temp, summary_statistic = \"BIC\", mixtures = c(\"diploid\", \"triploid\", \"tetraploid\")) write.csv(summary, file = paste0(\"../inst/extdata/05_interpret/\", samples[i], \".csv\"), row.names = FALSE) } # Create key key <- data.frame(sample = c(\"MLG013\", \"MLG014\", \"MLG015\"), ploidal.level = c(\"diploid\", \"tetraploid\", \"triploid\")) # Read in quackit() output dfs <- lapply(list.files(\"../inst/extdata/05_interpret/\", full.names = TRUE ), read.csv) alloutput <- do.call(rbind, dfs) # Combined alloutputcombo <- dplyr::left_join(alloutput, key) # Check the accuracy alloutputcombo <- alloutputcombo %>% dplyr::mutate(accuracy = ifelse(winnerBIC == ploidal.level, 1, 0)) ## What distribution and model type should we use? sumcheck <- alloutputcombo %>% group_by(Distribution, Type) %>% summarize(total = n(), correct = sum(accuracy)) kbl(sumcheck) %>% kable_paper(\"hover\", full_width = F)"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"running-only-the-best-model","dir":"Articles","previous_headings":"Model inference","what":"Running only the best model","title":"Basic Example","text":"unknown samples, can use best model predict ploidal level bestquack() function.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") out <- c() for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) out[[i]] <- bestquack(temp, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = samples[i]) }"},{"path":"http://mlgaynor.com/nQuack/articles/BasicExample.html","id":"bootstrap-replicates","dir":"Articles","previous_headings":"Model inference","what":"Bootstrap replicates","title":"Basic Example","text":"also provide function run bootstrap replicates best model. Warning, print lot ducks. output function includes two rows, first show best model original data set, second tallies bootstrap replicates best model. diploid, see best model replicates diploid! means 100% bootstrap support. can use replicates identify model shouldn’t trusted. example, model known missassign ploidal level MLG129, tetraploid. found 4/1000 bootstrap replicates support correct model. Suggesting deviation one mixture/ploidal level may indicate untrustworthy model. However, likely varies across models sample sets.","code":"samples <- c(\"MLG013\", \"MLG014\", \"MLG015\") bout <- c() for(i in 1:length(samples)){ temp <- as.matrix(read.csv(paste0(\"../inst/extdata/03_processed/\", samples[i], \".csv\"))) bout[[i]] <- quackNboots(temp, nboots = 100, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = samples[i]) } write.csv(bout[[1]], file = \"../inst/extdata/06_boots/MLG013-boots.csv\", row.names = FALSE) write.csv(bout[[2]], file = \"../inst/extdata/06_boots/MLG014-boots.csv\", row.names = FALSE) write.csv(bout[[3]], file = \"../inst/extdata/06_boots/MLG015-boots.csv\", row.names = FALSE) MLG013boot <- read.csv(\"../inst/extdata/06_boots/MLG013-boots.csv\") MLG013boot ## diploid triploid tetraploid sample ## 1 1 NA NA MLG013 ## 2 100 NA NA MLG013 temp <- as.matrix(read.csv(\"../inst/extdata/06_boots/MLG129.csv\")) check <- quackNboots(temp, nboots = 1000, distribution = \"normal\", type = \"fixed\", uniform = 1, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\"), samplename = \"MLG129\") write.csv(check, file = \"../inst/extdata/06_boots/MLG129-boots.csv\", row.names = FALSE) MLG129boot <- read.csv(\"../inst/extdata/06_boots/MLG129-boots.csv\") MLG129boot ## diploid triploid tetraploid sample ## 1 1 NA NA MLG129 ## 2 996 NA 4 MLG129"},{"path":"http://mlgaynor.com/nQuack/articles/BiasEstimation.html","id":"simulated-data","dir":"Articles","previous_headings":"","what":"Simulated Data","title":"Bias Estimation","text":"simulated 2000 data sets ploidal level. ploidal level, set simulated proportions, mean, variance equal mixture models starting parameters. simulated 25000 sites data set, 100 replicates per parameter set. included 20 parameter sets differed coverage. distribution coverage simulated data sets can seen :","code":""},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"align-to-a-reference-genome","dir":"Articles","previous_headings":"Preprocessing","what":"Align to a reference genome","title":"Data Preparation","text":"processing raw sequence data, align reads reference genome. use bwa-mem2 align reads. converted SAM files BAM files using samtools. bash script:","code":"# Load module module load bwa-mem2/2.2.1 module load samtools/1.15 # Index the reference bwa-mem2 index reference_genome.gz # Mapping ## -t Number of threads, here we run our mapping on 10 threads. ## -M Indexed reference genome. bwa-mem2 mem -t 10 -M reference_genome.gz sample_001_1.fastq sample_001_2.fastq > sample_001.sam # SAM to BAM samtools view -S -b sample_001.sam > sample_001.bam # SORT samtools sort sample_001.bam -o sample_001.bam"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"identify-repeat-regions","dir":"Articles","previous_headings":"Preprocessing > Remove repeats","what":"Identify repeat regions","title":"Data Preparation","text":"Prior running samples nQuack, suggest repeat regions removed. remove repeats, first identify . utilize repeat modeler repeat masker identify mask repeats. also make database mask repeats, used remove repeats samples alignment.","code":"## Load modules module load repeatmodeler/2.0 module load repeatmasker/4.1.1 # Set database name databasename=Species # Build database based on the reference genome BuildDatabase -name $databasename ReferenceGenome.fasta # Repeat Modeler ## -LTRStruct = runs the LTR structural discovery pipeline ( LTR_Harvest and LTR_retreiver ) ### and combine results with the RepeatScout/RECON pipeline. RepeatModeler -pa 36 -database $databasename -LTRStruct > out.log # Move and gzip the database created mv RM* 01_RepMod cd 01_RepMod tar cvzf RepMod_rounds.tar.gz round-* LTR* tmp* rm -r round-* LTR* tmp* gzip families* cd .. mv $databasename* 01_RepMod/ cp ReferenceGenome.fasta $databasename.fasta # Repeat Masker ## -pa # of threads ## -a return alignment ## -xsmall returns with masked lowercased ## -gff creates the gene feature finding formatted output ## -lib indicates the library, alternatively you could indicate the species ## -dir indicates output directory ## .fasta = reference genome RepeatMasker -pa 24 -a -xsmall -gff -lib 01_RepMod/consensi.fa.classified -dir 02_RepMask $databasename.fasta # Prepare for use mkdir 03_database cut -f1,4,5 02_RepMask/$databasename.fasta.out.gff | perl -pi -e 's/^#.*\\n//g' > 03_database/ref2_$databasename.gff.bed"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"remove-repeats-from-your-alignment","dir":"Articles","previous_headings":"Preprocessing > Remove repeats","what":"Remove repeats from your alignment","title":"Data Preparation","text":"Based bed file created , remove repeats alignment samtools function veiw.","code":"# Load modules module load samtools/1.12 # Make directories mkdir repeats_removed ## Remove Repeats samtools view sample_001.bam -b -h -o /dev/null -U repeats_removed/sample_001.bam -L 03_database/ref2_$databasename.gff.bed"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"filter-low-quality","dir":"Articles","previous_headings":"Preprocessing","what":"Filter low quality","title":"Data Preparation","text":"remove regions low quality based MAPQ score. MAPQ equal -10*log-base-10(Pr(mapping position wrong)), rounded nearest integer (see ). example, wanted remove site 50% chance mapped wrong position, set filter 4. calculation MAPQ score depends alignment software ( see ), therefore difficult pinpoint score needed remove reads map multiple locations. take stringent approach remove sites 10% chance mapped wrong location set -q flag 10.","code":"ceiling(-10*log10(0.5)) #> [1] 4 # Load modules module load samtools/1.15 # Make directories mkdir filtered ## Remove Repeats samtools view -b -q 10 repeats_removed/sample_001.bam > filtered/sample_001.bam"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"step-1-prepare-","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Step 1: Prepare.","title":"Data Preparation","text":"running model, data must converted tab-seperated text file. , wrote custom function prepare_data() convert BAM file tab-seperated text file samtools. function, must supply filename (without .bam ending), path directory containing BAM files, path directory want processed files saved.","code":"## Prepare many samples inpath <- \"filtered/\" outpath <- \"Processed/\" filelist <- list.files(path = inpath, pattern = \"*.bam\" ) filelist <- gsub(\".bam\", \"\", filelist) for( i in 1:length(filelist)){ prepare_data(filelist[i], inpath, outpath) }"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"warning-samtools-must-be-local","dir":"Articles","previous_headings":"Data processing with nQuack > Step 1: Prepare.","what":"Warning: samtools must be local!","title":"Data Preparation","text":"working personal computer, just make sure samtools installed callable “samtools”. working cluster, may need install samtools locally. Though location install may differ, installed samtools locally UF’s amazing HiPerGator slurm cluster:","code":"mkdir bin cd bin ln -s /apps/samtools/1.15/bin/samtools samtools"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"step-2-process","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Step 2: Process","title":"Data Preparation","text":"bam converted tab-seperated text file, must process read R environment. function, provide three options filtering data: Total coverage can filtered based minimum sequencing depth maximum sequencing depth quantile probability (coverage falls max.depth.quantile.prob removed). Allele coverage can filtered based sequencing error rate, coverage allele must total coverage times error rate: \\(Coverage() > Coverage(+ B)*(error)\\), less total coverage times one minus error rate: \\(Coverage() < Coverage(+ B)*(1-error)\\) . Finally, sites may filtered based calculated allele frequency, removing sites lower bound, \\(C_{L}\\) , upper bound , \\(C_{U}\\) . Allele frequency can filtered based minimum maximum allele frequency. Finally, avoid data duplication, randomly sample allele equal probability site. resulting data set includes total coverage per site coverage associated randomly sampled allele.","code":"(1) Total coverage filter (2) Allele coverage filter (3) Allele frequency filter ## Prepare many samples textfiles <- list.files(path = \"Processed/\", pattern = \"*.txt\", full.name = FALSE) for(i in 1:length(textfiles)){ temp <- process_data(paste0(\"Processed/\", textfiles[i]), # File with full location min.depth = 2, # Total coverage gilter max.depth.quantile.prob = 0.9, # Total coverage filter error = 0.01, # Allele Coverage Filter trunc = c(0,0)) # Allele Frequency Filter assign((gsub(\".txt\", \"\", textfiles[i])), temp) }"},{"path":[]},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"coverage-filters-","dir":"Articles","previous_headings":"Data processing with nQuack > Step 2: Process > When should you filter your data?","what":"Coverage filters.","title":"Data Preparation","text":"Increasing decreasing minimum maximum coverage filter parameters might necessary data set. Total coverage can inspected based output process_data(), call output xm. x-axis coverage histogram goes way targeted sequencing depth, need decrease maximum depth quantile probability.","code":"# Plot hist(xm[,1]) ## Error cutoffs ### If I increase the sequence error rate, how many sites will likely be removed? new.e <- 0.02 # 2 sites out of every 100 removes <- c() for(i in 1:nrow(xm)){ if(xm[i,2] < (xm[i,1]*new.e) | xm[i,2] > (xm[i,1]*(1-new.e))){ removes[i] <- 1 }else{ removes[i] <- 0 } } sum(removes)"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"allele-frequency","dir":"Articles","previous_headings":"Data processing with nQuack > Step 2: Process > When should you filter your data?","what":"Allele Frequency","title":"Data Preparation","text":"Previous methods automatically truncated data allele frequency, removing site frequencies 0.1 0.9. first pass, suggest truncate data. However, data processed, plot data inspect. Phytophthora infestans (ENA:ERR1990235, Triploid), processed data truncation: Notice U-shaped ends? Well removed prior ploidal estimation. simply set trunc = c(0.15,0.85) reinspected data: One last note - expect 6x samples, careful truncation, mean one mixtures 0.16 hexaploid.","code":"# Convert to allele frequency xi <- xm[,2]/xm[,1] # Plot hist(xi)"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"comparing-nquack-to-nquire","dir":"Articles","previous_headings":"Data processing with nQuack","what":"Comparing nQuack to nQuire","title":"Data Preparation","text":"prepare data nQuack, use samtools mpileup, similarly nQuire, removes reads align PCR duplicates (samtool flags: BAM_FUNMAP BAM_FDUP). default, nQuire removes sites coverage less 10, well sites frequency less 0.2 0.8. allow users modify parameters text file creater rather default arbitrary cutoffs. mimic nQuire’s defaults triploid mentioned (Phytophthora infestans, ENA:ERR1990235). nQuack, can replicate nQuire data frame desired. happen like nQuire’s data preparation , uses data program. processing samples nQuire’s create view functions, resulting txt file can read R. prepare data frame nQuack, reduce three column data frame two columns randomly sampling allele B every site. created function help .","code":"# Read in nQuire txt file df <- process_nquire(\"file.txt\")"},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"what-about-denoising","dir":"Articles","previous_headings":"Data processing with nQuack","what":"What about ‘denoising’?","title":"Data Preparation","text":"Noise can get way model selection, identifying data real noise difficult. filter allele frequencies normal + uniform mixture model. nQuack uses scaled probability data point belonging mixture model, inferred expected maximization algorithm. remove allele frequencies probability belonging uniform mixture higher probability belonging mixture. also implement nQuire’s denoise method , utilizes inferred alpha parameter histogram base frequencies filter data. method without faults. Notably, method poor job truncating allele frequencies needs done prior attempting method. example, triploid mentioned (Phytophthora infestans, ENA:ERR1990235), truncated c(0.1, 0.9) peak toward right still retained. However, truncate inital data set c(0.15, 0.85), much cleaner data set returned denoise_data() function:","code":""},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"alternative-approach---bclean","dir":"Articles","previous_headings":"Data processing with nQuack > What about ‘denoising’?","what":"Alternative approach - Bclean","title":"Data Preparation","text":"shape scale parameters associated beta distribution less 1, distribution U-shaped. can leverage U-shaped distribution remove noise. utilize beta distribution three mixtures representing cytotypes included nQuack two mixtures representing U-shaped distribution. constrained first three mixtures shape scale parameters 1, last two mixtures shape scale constrained less 1. implementation expected maximization, utilizes scaled probability data point belonging mixture model remove sites probability belonging U-shaped mixture higher probability belonging mixture. Due computational time needed run expected maximization algorithm, default, simple calculate probability matrix E-step run complete algorithm. great alternative allele truncation. example Bclean() function applied nQuire’s samples including Phytophthora infestans diploid (99189) triploid (88069), Saccharomyces cerevisiae diploid (SRR3265396), triploid (SRR3265389), tetraploid (SRR3265401): method can also applied truncation catch extra peak:","code":""},{"path":"http://mlgaynor.com/nQuack/articles/DataPreparation.html","id":"bclean-and-denoise","dir":"Articles","previous_headings":"Data processing with nQuack > What about ‘denoising’?","what":"Bclean and Denoise","title":"Data Preparation","text":"can also add two functions together, however may reduce data set dramatically impact model inference thoroughly explored. Now ready run nQuack!","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"site-based-heterozygosity","dir":"Articles","previous_headings":"","what":"Site-based heterozygosity","title":"Model Options","text":"biallelic site diploid allele’s B, expect sequence alleles equal proportions 50% sequences representing 50% sequences representing B. simple idea foundation site-based heterozygosity methods. approaches based biallelic single nucleotide polymorphisms within individual expected number copies allele.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"nquack","dir":"Articles","previous_headings":"","what":"nQuack","title":"Model Options","text":"Like previous site-based heterozygosity approaches predict ploidal level, use mixture model approach estimate likely ploidal level. nQuack provides multiple implementations expectation maximization (EM) algorithm, including implementations : normal distribution mimics nQuire, corrected normal distribution, beta distribution, beta-binomial distribution. implementations provided without uniform mixtures, total eight implementations. details available implementations use .","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"model-types","dir":"Articles","previous_headings":"nQuack","what":"Model types","title":"Model Options","text":"nQuire’s implementation expectation maximization normal distribution two types models: “free” variable estimated “fixed” variance estimated. nQuire calculate log-likelihood ratio free fixed models identify sample diploid, triploid, tetraploid. simplest approach expand higher ploidal levels just add distribution fixed mixtures examined. However, mixture models additional components might helpful predicting ploidal level. figure shows basic components mixture model include mean (\\(\\mu\\)), variance (\\(\\sigma\\)), proportion (alpha, \\(\\alpha\\)). components, expectations mean, expected frequencies allele, seen table . know proportions may differ allopolyploid compared autopolyploid, interested exploring models alpha free. ended coding implementation provide estimates parameters free (type = 'free'), alpha free (type = 'fixed'), alpha variance free (type = 'fixed_2'), variance free (type ='fixed_3). nQuire’s fixed models predict variance resemble type = 'fixed_3' implementation.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"additional-arguments-","dir":"Articles","previous_headings":"nQuack","what":"Additional arguments.","title":"Model Options","text":"expectation maximization function, starting parameter list (parm.list) must provided includes avec, mvec, svec, represent \\(\\alpha\\), \\(\\mu\\), \\(\\sigma\\) mixture. length input vector equal, unless distribution includes uniform mixture, avec include value uniform distribution. values must greater 0 less equal 1, sum(avec) equal 1. normal beta distributions, allele frequency provided (xi), however beta-binomial mixtures expect total coverage coverage randomly sampled allele (xm). Convergence algorithm occurs set niters iterates, difference augmented log-likelihood values less epsilon. also allow truncation probability density function match allele frequency truncation may done data cleaning (trunc). notes: equivalent nQuire model emstepNU() function. Implementations Nelder-Mead numerical optimization unable estimate single alpha value, therefore diploid mixtures estimated emstepB(type = \"fixed\")emstepBB(type = \"fixed\") log-likelihood calculated based sum probability density functions mixtures. beta beta-binomial use Nelder-Mead numerical optimization, computationally ‘slow’. provide packaged functions apply methods . Expectation maximization implementations can found : Normal: emstepNA() Normal uniform: emstepNUA() nQuire’s normal: emstepN() nQuire’s normal uniform: emstepNU() Beta: emstepB() Beta uniform: emstepBU() Beta-binomial: emstepBB() Beta-binomial uniform: emstepBBU()","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"mixture-model-implementation-","dir":"Articles","previous_headings":"","what":"Mixture model implementation.","title":"Model Options","text":"expect anyone program starting parameters needed run mixture model, designed functions help.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"implementation","dir":"Articles","previous_headings":"Mixture model implementation.","what":"Implementation","title":"Model Options","text":"quackNormal() quackNormalNQ() quackBeta() quackBetaBinom() functions can run 32 mixture models . matrix total coverage coverage randomly sampled allele (xm) supplied functions. functions can run parallel resources avaliable. input parameters mixture based table variance equal 0.01 alpha divided equally among expected frequencies. example, triploid: mvec = c(0.33, 0.67), avec=c(0.50, 0.50), svec=c(0.01, 0.01). uniform mixture included, allocate 0.1 avec mixture, therefore, avec = c(0.45, 0.45, 0.1). function, input mean variance can transformed based sequencing error rate sequencing overdispersion parameter (see supplemental methods information). Variance can also set 0.001. bestquack() Sometimes may want run 32 mixture models, created function lets pick distribution, type, mixtures included.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/ModelOptions.html","id":"interpretation","dir":"Articles","previous_headings":"Mixture model implementation.","what":"Interpretation","title":"Model Options","text":"Based functions, users able obtain BIC scores, log-likelihood, log-likelihood ratios (\\(\\Delta\\log{L}\\)) mixture. BIC \\(\\Delta\\log{L}\\), lowest value likely model. Based log-likelihood, highest value likely model. recommend using BIC penalized sample size number parameters estimated. BIC lead less errors model selection (see Dennis et al. 2019). select best model based model outputs, provide function quackit().","code":""},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"simulate-data","dir":"Articles","previous_headings":"","what":"Simulate Data","title":"Outliers","text":"Following Figure S2 WeiB et al. 2018 (nQuire), simulate 10 sites binomial distribution probability 0.5 coverage equal 100. data set outlier, simulate single site coverage equal 400.","code":"dipNorm <- data.frame(matrix(ncol = 3, nrow =10)) for(i in 1:10){ dipNorm[i,1] <- 100 dipNorm[i,2] <- rbinom(n = 1, size = 100, prob = 0.5) dipNorm[i,3] <- dipNorm[i,1] - dipNorm[i,2] } dipBias <- data.frame(matrix(ncol = 3, nrow =10)) coverage <- c(rep(100, 9), 400) prob <- c(rep(0.5, 9), 0.5) for(i in 1:10){ dipBias[i,1] <- coverage[i] dipBias[i,2] <- rbinom(n = 1, size = coverage[i], prob = prob[i]) dipBias[i,3] <- dipBias[i,1] - dipBias[i,2] }"},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"comparing-the-log-likelihood","dir":"Articles","previous_headings":"","what":"Comparing the Log-Likelihood","title":"Outliers","text":"calculate simple log-likelihood given expected parameter values diploid triploid model. cases, found diploid likely model simulated data. Unlike simulations nQuire, find dramatic effect log-likelihood calculations outliers. attempted partition data match Figure S2 WeiB et al. 2018, however, never create binomial distribution likely model triploid. However, allele frequency high coverage outlier deviates expected (case, set probability 0.2), see log-likelihood binomial normal distributions greatly effected outlier. However, find diploid model likely cases. Noteably, see small shifts log-likelihood associated beta beta-binomial distributions.","code":""},{"path":"http://mlgaynor.com/nQuack/articles/Outliers.html","id":"tldr","dir":"Articles","previous_headings":"","what":"TLDR","title":"Outliers","text":"beta beta-binomial distributions look promising. find dramatic effect log-likelihood calculations outliers previously reported.beta beta-binomial distributions","code":""},{"path":"http://mlgaynor.com/nQuack/articles/SimulateData.html","id":"simple-or-idealisitic","dir":"Articles","previous_headings":"","what":"Simple or Idealisitic","title":"Simulate Data","text":"simple approach simulating data can done sim.ind.simple() coverage equal among sites counts allele sampled binomial distribution. example, want simulate 5000 sites 100x coverage diploid triploid, following: creates beautiful plots! However, reality look like data collect.","code":"dip <- sim.ind.simple(mvec = 0.5, cover = 100, s.size = 5000, sampled = TRUE) tri <- sim.ind.simple(mvec = c(0.33, 0.67), cover = 100, s.size = 5000, sampled = TRUE) par(mfrow=c(1,2)) hist(dip[,2]/dip[,1], main = \"Diploid\", xlab = \"Allele Frequency\", xlim = c(0,1)) hist(tri[,2]/tri[,1], main = \"Triploid\", xlab = \"Allele Frequency\", xlim = c(0,1))"},{"path":"http://mlgaynor.com/nQuack/articles/SimulateData.html","id":"advanced-or-realistic","dir":"Articles","previous_headings":"","what":"Advanced or Realistic","title":"Simulate Data","text":"simulate realistic data, two ‘advance’ options: sim.ind.BB() sim.ind.BB.tau(). functions, total coverage site sampled truncated poisson distribution (Pfenninger et al. 2022), rtrunc function truncdist. Given randomly selected proportion (.e. mean associated variance), copies allele defined binomial sample probability defined beta distribution (.e. beta-binomial) copies allele B equal remainder. Data filtered remove homozygous sites one allele sampled chance. filtered sites based total coverage sequencing coverage allele. function can also filter sites based truncated allele frequencies. Finally, randomly sample allele equal probability site. resulting data set includes total coverage per site coverage associated randomly sampled allele. want use one advance functions simulate similar scenario , might look something like : ploidal level samples definitely clear cut !","code":"dip.adv <- sim.ind.BB(mvec = 0.5, svec = 0.1, avec = 1.0, error = 0.001, s.size = 5000, max.coverage = 100, min.coverage = 50, lambda = 75) tri.adv <- sim.ind.BB(mvec = c(0.33, 0.67), avec = c(0.5, 0.5), svec = c(0.01, 0.01), error = 0.001, s.size = 5000, max.coverage = 100, min.coverage = 50, lambda = 75) par(mfrow=c(1,2)) hist(dip.adv[,2]/dip.adv[,1], main = \"Diploid - Advanced\", xlab = \"Allele Frequency\", xlim = c(0,1)) hist(tri.adv[,2]/tri.adv[,1], main = \"Triploid - Advanced\", xlab = \"Allele Frequency\", xlim = c(0,1))"},{"path":"http://mlgaynor.com/nQuack/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Michelle L. Gaynor. Author, maintainer. José Miguel Ponciano. Contributor.","code":""},{"path":"http://mlgaynor.com/nQuack/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Gaynor M, Landis J, O'Connor T, Laport R, Doyle J, Soltis D, Ponciano J, Soltis P (2024). “nQuack: R package predicting ploidal level sequence data using site-based heterozygosity.” Applications Plant Sciences, 12(4), e11606. doi:10.1002/aps3.11606.","code":"@Article{, title = {nQuack: An R package for predicting ploidal level from sequence data using site-based heterozygosity}, year = {2024}, journal = {Applications in Plant Sciences}, doi = {10.1002/aps3.11606}, pages = {e11606}, volume = {12}, number = {4}, author = {Michelle L. Gaynor and Jacob B. Landis and Timothy K. O'Connor and Robert G. Laport and Jeff J. Doyle and Douglas E. Soltis and José Miguel Ponciano and Pamela S. Soltis}, }"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"nquack","dir":"","previous_headings":"","what":"Predicting ploidal level from sequence data using site-based heterozygosity ","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"Michelle L. Gaynor, Jacob B. Landis, Tim K. O’Connor, Robert G. Laport, Jeff J. Doyle, Douglas E. Soltis, José Miguel Ponciano, Pamela S. Soltis","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"nQuack modified statistical framework predict ploidy level based sequence data. build upon Weib et al., 2018 Gaussian Mixture Model approach estimate ploidy level, originally written C executable.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"more-on-nquack","dir":"","previous_headings":"","what":"More on nQuack","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"provided expanded tools implementations improve site-based heterozygosity inferences ploidal level. nQuack provides data preparation guidance tools decrease noise input data. include maximum sequence coverage quantile filter sequence error-based filter, remove biallelic sites likely representative copy number variance nuclear genome. also consider frequency allele B site, instead , found methods. learn best practices, see Data Preparation guide. model improves upon nQuire framework extending higher ploidal levels (pentaploid hexaploid), correcting augmented likelihood calculation, implementing suitable distribution, allowing additional ‘fixed’ models. also decrease model selection errors relying BIC rather likelihood ratio tests. learn methods, see Model Options guide. provide 32 ways estimates likelihood mixture models expectation maximization algorithm (see ) - 8 expectation maximization implementations 4 model types . total, nQuack offers 128 models.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"evaluation-of-nquack","dir":"","previous_headings":"","what":"Evaluation of nQuack","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"examine utility method, examined 513,792 models based simulated real samples. using method, suggest read manuscript consider many limitations pattern-based approach determining ploidal level.","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"","code":"install.packages(\"devtools\") devtools::install_github(\"mgaynor1/nQuack\")"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"warning-samtools-must-be-local","dir":"","previous_headings":"Installation","what":"Warning: samtools must be local!","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"working personal computer, make sure samtools installed callable “samtools” via terminal. working cluster, may need symbolically-link samtools locally. Though location install may differ, make samtools callable locally UF’s amazing HiPerGator slurm cluster: implementation, see Basic Example article.","code":"mkdir bin cd bin ln -s /apps/samtools/1.15/bin/samtools samtools"},{"path":"http://mlgaynor.com/nQuack/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"Gaynor ML, Landis JB, O’Connor TK, Laport RG, Doyle JJ, Soltis DE, Ponciano JM, Soltis PS. 2024. nQuack: R package predicting ploidy level sequence data using site-based heterozygosity. Applications Plant Sciences 12(4):e11606. doi: 10.1002/aps3.11606","code":""},{"path":"http://mlgaynor.com/nQuack/index.html","id":"up-next","dir":"","previous_headings":"","what":"Up Next:","title":"Predicting ploidal level from sequence data using site-based heterozygosity ","text":"sequence data known plodial level mixed-ploidy system, let us know. love collaborate . included v2.0, please send email shellyleegaynor gmail.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove noise with the beta distribution — Bclean","title":"Remove noise with the beta distribution — Bclean","text":"filter allele frequencies beta mixture model contains 5 mixtures: three mixtures representing cytotypes included nQuack two mixtures representing U-shaped distribution. constrained first three mixtures shape scale parameters 1, last two mixtures shape scale constrained less 1. implementation expectation-maximization, utilizes scaled probability data point belonging mixture model remove site probability belonging U-shaped mixture higher probability belonging mixture. Due computational time needed run expectation-maximization algorithm, default, simple calculate probability matrix E-step run complete algorithm.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove noise with the beta distribution — Bclean","text":"","code":"Bclean(xm, plot = TRUE, quick = TRUE)"},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove noise with the beta distribution — Bclean","text":"xm Matrix total coverage coverage randomly sampled allele. plot Default TRUE. plots share y-axis, careful interpretation key. Warning, nothing removed, plot removed data missing. quick Default TRUE. set FALSE, expectation-maximization algorithm run full.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/Bclean.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove noise with the beta distribution — Bclean","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"","code":"alphabetacalc(mu, var)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"mu Mean. var Variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean and Variance — alphabetacalc","text":"Numeric vector alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"Calculate Alpha Beta Mean, Tau, Error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"","code":"alphabetacalctau(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"mu Mean. tau Overdispersion parameter. Ranges 0 1, 0 indicates less overdispersion 1 indicates high overdispersion. tau must greater 0. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctau","text":"Numeric vector alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"Vector-based - Calculate Alpha Beta Mean, Tau, Error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"","code":"alphabetacalctauvec(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"mu Vector mean. tau Overdispersion parameter. Ranges 0 1, 0 indicates less overdispersion 1 indicates high overdispersion. tau must greater 0. error Sequencing error rate. Ranges 0 1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalctauvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vector-based - Calculate Alpha and Beta from Mean, Tau, and Error rate. — alphabetacalctauvec","text":"Numeric matrix alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"Vector-based - Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"","code":"alphabetacalcvec(mu, var)"},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"mu Vector mean. var Vector variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/alphabetacalcvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vector-based - Calculate Alpha and Beta from Mean and Variance — alphabetacalcvec","text":"Numeric matrix alpha beta.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":null,"dir":"Reference","previous_headings":"","what":"BIC Calculations - Bad data — bdBICcalc","title":"BIC Calculations - Bad data — bdBICcalc","text":"function used model selection calculate BIC uniform distribution, represent data discernible pattern.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"BIC Calculations - Bad data — bdBICcalc","text":"","code":"bdBICcalc(xi)"},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"BIC Calculations - Bad data — bdBICcalc","text":"xi Vector allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bdBICcalc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"BIC Calculations - Bad data — bdBICcalc","text":"BIC score bad data model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"function made run subset models based selected distribution type. many limitations function make tractable, 128 models run package. include models comparisons found unhelpful, includes nQuire implementation log-likelihood ratio tests.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"","code":"bestquack( xm, distribution, type, uniform, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"), samplename, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA )"},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"xm Matrix two columns total coverage coverage randomly sampled allele. distribution May set normal, beta, beta-binomial. include implementation nQuire. type May equal fixed, fixed_2, fixed_3. uniform equal 1, uniform mixture included. equal 0, uniform mixture included. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"). samplename Name sample included output. trunc List two values representing lower upper bounds allele frequency truncation ,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bestquack.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Choose your distribution and type — bestquack","text":"BIC scores log-likelihood (LL) included mixture models. BIC, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta — bootB","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"Bootstrap Model Selection - Helper Function - Beta Bootstrap Model Selection - Helper Function - Beta-Binomial","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"","code":"bootB(pset, xi, trunc, boots, niter = 1000L, epsilon = 0.1) bootBB(pset, xm, trunc, boots, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$. boots Number bootstraps conduct. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. xm Matrix first column total coverage second count base B.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta — bootB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge. returned matrix, first row represents true data following rows represent permutations data. Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge. returned matrix, first row represents true data following rows represent permutations data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"function used model selection Beta mixed model. include haploid BIC calculation function. Warning, can slow.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"","code":"bootstrapB(xm, trunc, boots, return = \"complex\")"},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. boots Number bootstrap replications run. return much information return. Default \"complex\". numbers scare , pick \"simple\"!","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"function used model selection Beta mixed model. include haploid BIC calculation function. Warning, can slow.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"","code":"bootstrapBB(xm, trunc, boots, return = \"complex\")"},{"path":"http://mlgaynor.com/nQuack/reference/bootstrapBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap - Mixture Model Selection Test - Beta Distribution — bootstrapBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. boots Number bootstrap replications run. return much information return. Default \"complex\". numbers scare , pick \"simple\"!","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"function mimics nQuire calculates delta log-likelihood (likelihood ratio) based normal-uniform mixtures model types (free fixed).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"","code":"deltaquack.nQuire( xm, trunc = c(0, 0), type = \"alpha\", dup = FALSE, free = \"all\", return = \"both\" )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. dup setting dup = TRUE, data used mixture model include allele frequency allele allele B. form data dredging: single site, frequency frequency B add one (+ B = 1), therefore represent data point. Default = FALSE. free function originally built make sure understood nQuire calculations, therefore provide free model nQuire mixtures representing diploids, triploids, tetraplpoids. default, set free model represent \"\" ploidal levels tested nQuack. Options: \"nQuire\" \"\". return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquack.nQuire.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquack.nQuire","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"function can used estimate likely ploidal level using beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"","code":"deltaquackB( xm, trunc = c(0, 0), cores = NA, type = \"alpha\", free = \"BUM\", return = \"both\" )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. free Indicates free model utilize. Defaults 'BUM', beta-uniform model. Options: 'BUM' (Beta-uniform mixture), 'BM' (Beta mixture). return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Beta Distribution — deltaquackB","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"function can used estimate likely ploidal level using bet","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"","code":"deltaquackBB( xm, trunc = c(0, 0), cores = NA, type = \"alpha\", free = \"BUM\", return = \"both\", error = NULL, tau = NULL )"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. free Indicates free model utilize. Defaults 'BUM', beta-uniform model. Options: 'BUM' (Beta-uniform mixture), 'BM' (Beta mixture). return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''. error Sequencing error rate. tau Sequence overdispersion parameter read counts.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Beta-Binomial Distribution — deltaquackBB","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":null,"dir":"Reference","previous_headings":"","what":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"function mimics nQuire calculates delta log-likelihood (likelihood ratio) based normal-uniform mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"","code":"deltaquackN(xm, trunc = c(0, 0), type = \"alpha\", dup = FALSE, return = \"both\")"},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. type Sets free parameter fixed models. nQuire, variance free fixed models. allow free parameter fixed model : 'variance', 'alpha' (proportions mixture), '' (variance & alpha). Options: 'variance', 'alpha', ''. dup setting dup = TRUE, data used mixture model include allele frequency allele allele B. form data dredging: single site, frequency frequency B add one (+ B = 1), therefore represent data point. Default = FALSE. return Indicates values return. Options: 'deltaLL' (delta log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/deltaquackN.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Likelihood Ratio Tests - Mixture Model - Normal Distribution — deltaquackN","text":"Delta log-likelihood BIC scores fixed models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Denoise Data — denoise_data","title":"Denoise Data — denoise_data","text":"filter allele frequencies normal + uniform mixture model. nQuack utilizes scaled probability data point belonging mixture model, inferred expectation maximization algorithm. remove allele frequencies probability belonging uniform mixture higher probability belonging mixture. also implement nQuire's denoise method , utilizes inferred alpha parameter histogram base frequencies filter data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Denoise Data — denoise_data","text":"","code":"denoise_data(xm, plot = TRUE, filter = \"both\")"},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Denoise Data — denoise_data","text":"xm Matrix total coverage coverage randomly sampled allele. plot Default TRUE. plots share y-axis, careful interpretation key. Warning, nothing removed, plot removed data missing. filter Indicates method remove data based upon. Options: '', 'nquire', 'nquack'. nQuack utilizes scaled probability data point belonging mixture model, removing sites probability belonging uniform mixture higher probability belonging mixture. nQuire utilizes inferred alpha parameter histogram base frequencies filter data.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/denoise_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Denoise Data — denoise_data","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta Distribution — emstepB","title":"Expectation maximization - Beta Distribution — emstepB","text":"function calculates log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta Distribution — emstepB","text":"","code":"emstepB(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta Distribution — emstepB","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed-2\" (estimated parameter(s): alpha variance), \"fixed-3\" (estimated parameter(s): variance). avec length 1, fixed fixed-3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta Distribution — emstepB","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"function made Bclean() function preforms expectation maximization Nelder-Mead numerical optimization beta distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"","code":"emstepB3(parmlist, xi, niter, epsilon, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"parmlist list containing initial alpha, mean, variance. xi Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepB3.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta + Beta + Beta Distribution — emstepB3","text":"List elements including negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta-Binomial Distribution — emstepBB","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"function calculates negative log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta-binomial distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"","code":"emstepBB(parmlist, xm, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"parmlist list containing initial alpha, mean, variance. xm Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating \"Free\" \"Fixed\".","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta-Binomial Distribution — emstepBB","text":"List elements including negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"function calculates log-likelihood using expectation-maximization algorithm Nelder-Mead numerical optimization beta distribution one uniform mixture.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"","code":"emstepBBU(parmlist, xm, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"parmlist list containing initial alpha, mean, variance values. xm Matrix first column total coverage second count base B. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed-2\" (estimated parameter(s): alpha variance), \"fixed-3\" (estimated parameter(s): variance). avec length 1, fixed fixed-3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBBU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta-Binomial and Uniform Distributions — emstepBBU","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Beta and Uniform Distributions — emstepBU","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"function calculates log-likelihood using expectation maximization algorithm Nelder-Mead numerical optimization beta distribution one uniform mixture.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"","code":"emstepBU(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance). avec length 1, fixed fixed_3 able return log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepBU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Beta and Uniform Distributions — emstepBU","text":"List elements including log likelihood, negative log likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepN","title":"Expectation maximization - Normal Distribution — emstepN","text":"function calculates log-likelihood using expectation maximization algorithm Normal Distribution. code follows nQuire use augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepN","text":"","code":"emstepN(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepN","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepN.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepN","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepNA","title":"Expectation maximization - Normal Distribution — emstepNA","text":"function calculates log-likelihood using expectation maximization algorithm Normal Distribution. code identical nQuire uses augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepNA","text":"","code":"emstepNA(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepNA","text":"parmlist list containing initial alpha, mean, variance values. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepNA","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal and Uniform Distribution — emstepNU","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"function calculates log-likelihood using expectation maximization algorithm Normal-Uniform Distribution. code follows nQuire use augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"","code":"emstepNU(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"parmlist list containing initial alpha, mean, variance values. list alpha must include proportion uniform mixture. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNU.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal and Uniform Distribution — emstepNU","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":null,"dir":"Reference","previous_headings":"","what":"Expectation maximization - Normal Distribution — emstepNUA","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"function calculates log-likelihood using expectation maximization algorithm Normal-Uniform Distribution. code identical nQuire uses augmented likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"","code":"emstepNUA(parmlist, xi, niter, epsilon, trunc, type = \"free\")"},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"parmlist list containing initial alpha, mean, variance values. list alpha must include proportion uniform mixture. xi List observations, case allele frequencies. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached. trunc List two values representing lower upper bounds, $c_L$ $c_U$. type String indicating model type. Options: \"free\" (estimated parameter(s): alpha, mean, variance), \"fixed\" (estimated parameter(s): alpha), \"fixed_2\" (estimated parameter(s): alpha variance), \"fixed_3\" (estimated parameter(s): variance).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/emstepNUA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Expectation maximization - Normal Distribution — emstepNUA","text":"List elements including log-likelihood, number iterates, optimized parameter values.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":null,"dir":"Reference","previous_headings":"","what":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"used Bclean() function. complete E-Step calculate log-likelihood. Modifications include correction truncated distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"","code":"estepB3(parmlist, xi, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/estepB3.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"E-Step for Expectation Maximization - Beta + Beta + Beta Distribution — estepB3","text":"parmlist list containing initial alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":null,"dir":"Reference","previous_headings":"","what":"BIC Calculations - Haploid — haploidBICcalc","title":"BIC Calculations - Haploid — haploidBICcalc","text":"function used model selection calculate BIC haploid model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"BIC Calculations - Haploid — haploidBICcalc","text":"","code":"haploidBICcalc(xi, trunc)"},{"path":"http://mlgaynor.com/nQuack/reference/haploidBICcalc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"BIC Calculations - Haploid — haploidBICcalc","text":"xi Vector allele frequencies. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. @returns BIC score Haploid model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":null,"dir":"Reference","previous_headings":"","what":"Haploid log-likelihood calculation — haploidoptim","title":"Haploid log-likelihood calculation — haploidoptim","text":"Haploid log-likelihood calculation","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Haploid log-likelihood calculation — haploidoptim","text":"","code":"haploidoptim(xi)"},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Haploid log-likelihood calculation — haploidoptim","text":"xi List allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/haploidoptim.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Haploid log-likelihood calculation — haploidoptim","text":"Log-likelihood given shape scale (alpha beta) less one.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"Bootstrap Model Selection - Helper Function - Beta","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"","code":"mmselectB(pset, xi, trunc, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xi List observations, case allele frequencies. trunc List two values representing lower upper bounds, $c_L$ $c_U$. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta — mmselectB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"Bootstrap Model Selection - Helper Function - Beta-Binomial","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"","code":"mmselectBB(pset, xm, trunc, niter = 1000L, epsilon = 0.1)"},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"pset list 5 sets model parameters, list alpha, mean, variance. xm Matrix first column total coverage second count base B. trunc List two values representing lower upper bounds, $c_L$ $c_U$. niter Max number iterates. epsilon Epsilon value convergence tolerance. absolute delta log-likelihood value, convergence reached.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/mmselectBB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap Model Selection - Helper Function - Beta-Binomial — mmselectBB","text":"Summary statistics including Log-likelihood BIC score, well model checks. model checks include abcheck, verifies fit parameters suggest U-shaped distribution. vector \"converge\" also returned, contains number iterates convergence. number iterates surpasses niters parameter, model converge.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":null,"dir":"Reference","previous_headings":"","what":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"function used calculate variance.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"","code":"muvarcalcvec(mu, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"mu Vector means. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/muvarcalcvec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variance calculation from Mean, Tau, and Sequencing Error — muvarcalcvec","text":"Mean variance associated tau error.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":null,"dir":"Reference","previous_headings":"","what":"Data Preparation - Use nQuire's Data — nQuire_reformat","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"function reduce three column data frame two columns randomly sampling allele B every site. used function process_nquire()","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"","code":"nQuire_reformat(xm)"},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"xm matrix three columns: Total Coverage, Counts Allele , Counts Allele B.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/nQuire_reformat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Data Preparation - Use nQuire's Data — nQuire_reformat","text":"Numeric Matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepare data - Step 1 — prepare_data","title":"Prepare data - Step 1 — prepare_data","text":"function transforms BAM file text file. Specifically, function uses samtools mpileup translate BAM tab-separated file. filter file remove indels deletions. running function, temporary folder created (named 'temp/'), however folder removed process complete.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepare data - Step 1 — prepare_data","text":"","code":"prepare_data(name, inpath, outpath)"},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prepare data - Step 1 — prepare_data","text":"name File name without suffix. example, file called \"frog.bam\", input \"frog\". inpath Location input file. outpath Location output file.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepare data - Step 1 — prepare_data","text":"Writes text file following columns: chromosome, position, depth, , C, G, T.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/prepare_data.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Prepare data - Step 1 — prepare_data","text":"Warning, due processing time needed samtools mpileup, step may take time. function also requires samtools located locally. Please see Data Preparation article information. Warning, writes temporary folder titled 'temp'. want run multiple samples , suggest set working directory separate locations ensure temp folder/files overwritten.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Process data - Step 2 — process_data","title":"Process data - Step 2 — process_data","text":"Based file generated prepare_data(), contains total depth sequencing coverage nucleotide (, C, G, T), function remove single nucelotide polymorphisms. supplied, function filter coverage allele frequency. filter total coverage, user must supply min.depth max.depth.quantile.prob. error provided, sites retained allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. Lastly, based trunc, allele frequencies filtered based provided lower upper bound. Finally, function samples single allele frequency per site avoid data duplication.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Process data - Step 2 — process_data","text":"","code":"process_data( file, min.depth = 2, max.depth.quantile.prob = 0.9, error = 0.01, trunc = c(0, 0) )"},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Process data - Step 2 — process_data","text":"file Output txt file created prepare_data(). min.depth Minimum sequencing depth, default 2. max.depth.quantile.prob Maximum sequencing depth quantile cut , default = 0.9. error Sequencing error rate. error provided, sites retained allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. trunc List two values representing lower upper bounds, \\(c_{L}\\) \\(c_{U}\\) used filter allele frequencies.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Process data - Step 2 — process_data","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":null,"dir":"Reference","previous_headings":"","what":"Use nQuire's Data — process_nquire","title":"Use nQuire's Data — process_nquire","text":"happen like nQuire's data preparation , uses data program. processing samples nQuire's create view functions, resulting text file can read R. prepare data frame nQuack, reduce three column data frame two columns randomly sampling allele B every site.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Use nQuire's Data — process_nquire","text":"","code":"process_nquire(file)"},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Use nQuire's Data — process_nquire","text":"file Output text file created nQuire.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_nquire.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Use nQuire's Data — process_nquire","text":"Numeric matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":null,"dir":"Reference","previous_headings":"","what":"Data Preparation - Matrix Filtering — process_rcpp","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"Based supplied matrix total depth sequencing coverage nucleotide (, C, G, T) function remove single nucelotide polymorphisms. supplied, function filter coverage allele frequency. Finally, function samples single allele frequency per site avoid data duplication.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"","code":"process_rcpp(x, mindepth, maxprob, trunc, error)"},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"x Matrix five columns: Depth, , C, G, T. mindepth Minimum depth, default = 15. maxprob Maximum depth quantile cut , default = 0.9. trunc List two values representing lower upper bounds, $c_L$ $c_U$. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/process_rcpp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Data Preparation - Matrix Filtering — process_rcpp","text":"Numeric Matrix total coverage coverage randomly sampled allele.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta Distribution — quackB","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"function used expected maximization Beta Mixture Model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"","code":"quackB(xm, trunc = c(0, 0), cores = NA)"},{"path":"http://mlgaynor.com/nQuack/reference/quackB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta Distribution — quackB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"function wrapper expected maximization Beta Mixture Model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"","code":"quackBB(xm, trunc = c(0, 0), cores = NA, tau = NULL, error = NULL)"},{"path":"http://mlgaynor.com/nQuack/reference/quackBB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial Distribution — quackBB","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"function wrapper expected maximization Beta-Binomial Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"","code":"quackBBM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Binomial Mixture — quackBBM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"function wrapper expected maximization Beta-Binomial-Uniform Mixture Model. function runs free model mixtures. uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"","code":"quackBBUM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBBUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Binomial-Uniform Mixture — quackBBUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"function used expected maximization Beta Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"","code":"quackBM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta Distribution — quackBM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"function used expected maximization Beta-Uniform Mixture Model. function runs free model mixtures.uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"","code":"quackBUM(xm, trunc = c(0, 0), cores = NA, tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. cores Number cores use parallel processing. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Beta-Uniform Mixture — quackBUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"function uses expectation maximization beta beta-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"","code":"quackBeta( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBeta.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Beta Mixture — quackBeta","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"function uses expectation maximization beta-binomial beta-binomial-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"","code":"quackBetaBinom( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackBetaBinom.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Beta-Binomial Mixture — quackBetaBinom","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"function used expected maximization Normal Mixture Model. function runs free model mixtures.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"","code":"quackNM(xm, trunc = c(0, 0), tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Normal Distribution — quackNM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":null,"dir":"Reference","previous_headings":"","what":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"function used expected maximization Normal-Uniform Mixture Model. function runs free model mixtures.uniform mixture starting proportion 0.1.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"","code":"quackNUM(xm, trunc = c(0, 0), tau = NA, error = NA, return = \"LL\")"},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"xm Matrix two columns total coverage coverage randomly sampled allele. trunc List two values representing lower upper bounds allele frequency truncation , $c_L$ $c_U$. allele frequency truncation done remove error, need truncate expected, observed. truncation done, set c(0,0), default. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01. return Indicates values return. Options: 'LL' (log-likelihood) 'BIC' (bayesian information criterion) ''.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNUM.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mixture Model - Expected Maximization - Normal-Uniform Mixture — quackNUM","text":"Log-likelihood /BIC scores free models including diploid, triploid, tetraploid, pentaploid, hexaploid. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"function made assist bootstrap replication set models run subset models based selected distribution type. many limitations function make tractable, 128 models run package. include models comparisons found unhelpful, includes nQuire implementation log-likelihood ratio tests.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"","code":"quackNboots( xm, nboots = 100, distribution, type, uniform, mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"), samplename, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"xm Matrix two columns total coverage coverage randomly sampled allele. nboots Number bootstrap replicates examine. distribution May set normal, beta, beta-binomial. include implementation nQuire. type May equal fixed, fixed_2, fixed_3. uniform equal 1, uniform mixture included. equal 0, uniform mixture included. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\"). samplename Name sample included output. trunc List two values representing lower upper bounds allele frequency truncation ,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNboots.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrapping - Expectation Maximization - Choose your distribution and type — quackNboots","text":"BIC scores log-likelihood (LL) included mixture models. , smallest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"function uses expectation maximization normal normal-uniform mixture models model selection. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"","code":"quackNormal( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation,\\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormal.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Normal Mixture — quackNormal","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"function uses expectation maximization normal normal-uniform mixture models model selection based nQuire approach. can run 32 mixture models.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"","code":"quackNormalNQ( xm, samplename, cores, parallel = FALSE, trunc = c(0, 0), lowvar = FALSE, tau = NA, error = NA, free = FALSE )"},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"xm Matrix two columns total coverage coverage randomly sampled allele. samplename Name sample included output. cores Threads available run process parallel. parallel default = FALSE, set true cores > 1. trunc List two values representing lower upper bounds allele frequency truncation , \\(c_{L}\\) \\(c_{U}\\). allele frequency truncation done remove error, need truncate expected. truncation done, set c(0,0), default. lowvar Default FALSE. false, variance equal 0.01. set TRUE tau error provided, variance set 0.001. tau Sequencing overdispersion parameter. tau error provided, variance mixture inferred values. , variance default equal 0.01 0.001. error Sequencing error rate. tau error provided, variance mixture inferred values. free default = FALSE, skip free model calculation calculate delta log-likelihood.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackNormalNQ.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Expectation Maximization - Normal Mixture (nQuire) — quackNormalNQ","text":"BIC scores log-likelihood (LL) mixture models including diploid, triploid, tetraploid, pentaploid, hexaploid. free = TRUE, delta log-likelihood (dLL) calculated based associated free model (without uniform mixture). BIC delta-log likelihood, smallest score likely model. LL, largest score likely model.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":null,"dir":"Reference","previous_headings":"","what":"Model Selection - Based on BIC or Log-Likelihood — quackit","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"function model interpretation.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"","code":"quackit( model_out, summary_statistic = \"BIC\", mixtures = c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\") )"},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"model_out Data frame containing, minimum, columns labeled LL, type, mixture, distribution, BIC. summary_statistic May equal BIC dLL. mixtures Defaults c(\"diploid\", \"triploid\", \"tetraploid\", \"hexaploid\", \"pentaploid\").","code":""},{"path":"http://mlgaynor.com/nQuack/reference/quackit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model Selection - Based on BIC or Log-Likelihood — quackit","text":"Returns data frame likely model set mixtures. Includes best second best mixtures, well difference two.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Alpha and Beta from Mean and Variance — resample_xm","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"Calculate Alpha Beta Mean Variance","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"","code":"resample_xm(xm, n)"},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"xm Matrix total coverage coverage randomly sampled allele. n Length matrix.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/resample_xm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Alpha and Beta from Mean and Variance — resample_xm","text":"Randomly sampled matrix.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"function used replace variance mixture model sets.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"","code":"setconvert(set, tau, error)"},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"set list lists, lists must contain avec, mvec, svec. tau Sequence overdispersion parameter read counts. error Sequencing error rate.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/setconvert.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate Variance from Mean, Tau, and Sequencing Error — setconvert","text":"Mean variance associated tau error.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"function used simulate coverage allele biallelic heterozygous sites assuming beta binomial distribution. sample sequence depth truncated poisson distribution set minimum, maximum, lambda. heterozygous sites returned. Based input variables, sites may filtered based total coverage (filter.coverage), allele sequencing coverage (filter.error), allele frequency (filter.freq).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"","code":"sim.ind.BB( mvec, avec, svec, error = 0.001, s.size = 50000, lambda = 11, max.coverage = 20, min.coverage = 2, filter.coverage = TRUE, max.depth.quantile.prob = 0.9, filter.error = TRUE, filter.freq = FALSE, trunc = c(0, 0), sampled = TRUE )"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"mvec Vector mean values allele frequency. avec Vector alpha values representing proportion expected mean. svec Vector variance values. error Sequencing error rate. Default 0.001, low error. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. lambda Set lambda truncated poisson distrubtion. Defaults 11. max.coverage Maximum sequencing depth per site. Defaults 20. min.coverage Minimum sequencing depth per site. Defaults 2. filter.coverage Default TRUE. Filters retain sites total sequencing depth greater provided minimum coverage less max quantile depth (set max.depth.quantile.prob). max.depth.quantile.prob Maximum depth quantile probability. Defaults 0.9. filter.error Default TRUE. Filter retain sites allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. filter.freq Default FALSE. set true, sites filtered based provided trunc. trunc List two values representing lower upper bounds,\\(c_{L}\\) \\(c_{U}\\). Defaults c(0,0) represent truncation. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution — sim.ind.BB","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"function used simulate frequency biallelic heterozygous sites assuming beta-binomial distribution. sample sequence depth truncated poisson distribution set minimum, maximum, lambda. heterozygous sites returned. Based input variables, sites may filtered based total coverage (filter.coverage), allele sequencing coverage (filter.error), allele frequency (filter.freq).","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"","code":"sim.ind.BB.tau( mvec, avec, tau = 0.01, error = 0.001, s.size = 50000, lambda = 11, max.coverage = 20, min.coverage = 2, filter.coverage = TRUE, max.depth.quantile.prob = 0.9, filter.error = TRUE, filter.freq = FALSE, trunc = c(0, 0), sampled = TRUE )"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"mvec Vector mean values allele frequency. avec Vector alpha values representing proportion expected mean. tau Overdispersion parameter. Defaults 0.01. error Sequencing error rate. Defaults 0.001. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. lambda Set lambda truncated poisson distrubtion. Defaults 11. max.coverage Maximum sequencing depth per site. Defaults 20. min.coverage Minimum sequencing depth per site. Defaults 2. filter.coverage Default TRUE. Filters retain sites total sequencing depth greater provided minimum coverage less max quantile depth (set max.depth.quantile.prob). max.depth.quantile.prob Maximum depth quantile probability. Defaults 0.9. filter.error Default TRUE. Filter retain sites allele coverage greater sequencing error rate times total coverage, less one minus sequencing error rate times total coverage. filter.freq Default FALSE. set true, sites filtered based provided trunc. trunc List two values representing lower upper bounds, \\(c_{L}\\) \\(c_{U}\\). Defaults c(0,0) represent truncation. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.BB.tau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Beta-Binomial Distribution with Overdispersion and Error — sim.ind.BB.tau","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":null,"dir":"Reference","previous_headings":"","what":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"function used simulate coverage allele biallelic heterozygous sites assuming binomial distribution.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"","code":"sim.ind.simple(mvec, cover = 100, s.size = 50000, sampled = TRUE)"},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"mvec Vector means. cover Coverage sites. s.size Number biallelic sites generate. Defaults 50000. Warning, number sites generated number sites returned due filtering steps. sampled Default TRUE. randomly sample allele allele B, return data frame total coverage coverage randomly sampled allele returned.","code":""},{"path":"http://mlgaynor.com/nQuack/reference/sim.ind.simple.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Simulate Allele Counts for Single Individual - Simple Approach — sim.ind.simple","text":"sampled = FALSE, data frame total coverage, coverage allele , coverage allele B returned. sampled = TRUE, data frame total coverage coverage randomly sampled allele returned.","code":""}] diff --git a/inst/CITATION b/inst/CITATION index ebf7a70..1be06cd 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,9 +1,11 @@ bibentry(bibtype = "Article", title = "nQuack: An R package for predicting ploidal level from sequence data using site-based heterozygosity", year = "2024", - journal = "bioRxiv", - publisher = "Cold Spring Harbor Laboratory", - doi = "10.1101/2024.02.12.579894", + journal = "Applications in Plant Sciences", + doi = "10.1002/aps3.11606", + pages = "e11606", + volume = "12", + number = "4", author = c(person(given = "Michelle L.", family = "Gaynor", email = "shellyleegaynor@gmail.com"),