diff --git a/NAMESPACE b/NAMESPACE index e67a690..2dcd5a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,27 +1,44 @@ # Generated by roxygen2: do not edit by hand S3method(plot,kalisDistanceMatrix) +S3method(plot,kalisIterator) S3method(print,kalisBackwardTable) +S3method(print,kalisCheckpointTable) S3method(print,kalisForwardTable) +S3method(print,kalisIterator) S3method(print,kalisParameters) +S3method(targets,kalisIterator) export(Backward) export(CacheHaplotypes) export(CacheSummary) export(CalcRho) +export(CalcTraces) +export(CladeMat) +export(CladeMat_old) +export(Clades) export(ClearHaplotypeCache) export(CopyTable) +export(CreateForwardTableCache) export(DistMat) +export(FillTableCache) export(Forward) +export(ForwardIterator) +export(ForwardUsingTableCache) export(L) export(MakeBackwardTable) export(MakeForwardTable) export(N) +export(Neighbors) export(Parameters) export(PostProbs) +export(PruneCladeMat) export(QueryCache) export(ReadHaplotypes) export(ResetTable) +export(Sprigs) +export(Sprigs_old) export(WriteHaplotypes) +export(calc_tables) import(checkmate) import(dplyr) importFrom(digest,digest) diff --git a/R/Iterator.R b/R/Iterator.R index e28a095..51cb1d7 100644 --- a/R/Iterator.R +++ b/R/Iterator.R @@ -274,6 +274,7 @@ targets <- function(x) { # put this declaration above and below because it seems UseMethod("targets") } +#' @export targets.kalisIterator <- function(iter){ if(!"kalisIterator"%in%class(iter)){stop("argument must be a kalisIterator")} rev(get("targets", envir = environment(iter))) @@ -283,6 +284,7 @@ targets <- function(x) { UseMethod("targets") } +#' @export print.kalisIterator <- function(iter){ if(!"kalisIterator"%in%class(iter)){stop("argument must be a kalisIterator")} @@ -297,6 +299,7 @@ print.kalisIterator <- function(iter){ } } +#' @export plot.kalisIterator <- function(iter){ if(!"kalisIterator"%in%class(iter)){stop("argument must be a kalisIterator")} sch <- get("sch",envir = environment(iter)) diff --git a/man/CladeMat.Rd b/man/CladeMat.Rd new file mode 100644 index 0000000..7eed863 --- /dev/null +++ b/man/CladeMat.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CladeMat.R +\name{CladeMat} +\alias{CladeMat} +\title{Fast Clade Matrix Construction} +\usage{ +CladeMat( + fwd, + bck, + M, + unit.dist, + thresh = 0.2, + max1var = FALSE, + nthreads = min(parallel::detectCores(logical = FALSE), fwd$to_recipient - + fwd$from_recipient + 1) +) +} +\description{ +Fast Clade Matrix Construction +} diff --git a/man/CladeMat_old.Rd b/man/CladeMat_old.Rd new file mode 100644 index 0000000..aebb53c --- /dev/null +++ b/man/CladeMat_old.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{CladeMat_old} +\alias{CladeMat_old} +\title{CladeMat OLD} +\usage{ +CladeMat_old( + x, + ploidy = 2L, + sprigs.to.prune = NULL, + assemble = TRUE, + use.forking = FALSE, + forking.chunk.size = 100L, + mc.preschedule = FALSE, + nthreads = 1L +) +} +\arguments{ +\item{x}{a \code{kalisClades} object returned by \code{\link{kalisClades}}} + +\item{ploidy}{an integer, the ploidy of the organism} + +\item{sprigs.to.prune}{a \code{kalisSprigs} object returned by \code{\link{kalisSprigs}} encoding sprigs that should be excluded from the matrix returned} + +\item{assemble}{a logical, if FALSE return the clade matrix as a list of columns rather than as a symmetrized matrix} + +\item{use.forking}{a logical, should forked processes be used?} + +\item{nthreads}{the number of CPU cores to use. Currently, no parallelism is used.} +} +\value{ +a matrix representation of the probabilistic clades provided +} +\description{ +Utility for contructing a probabilistic clade matrix +} diff --git a/man/Clades.Rd b/man/Clades.Rd new file mode 100644 index 0000000..cae541b --- /dev/null +++ b/man/Clades.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{Clades} +\alias{Clades} +\title{Probabilistic Clades} +\usage{ +Clades( + fwd, + bck, + pars, + beta.theta.opts = NULL, + safety.checks = FALSE, + neighbors = FALSE, + use.forking = FALSE, + forking.chunk.size = 100L, + mc.preschedule = FALSE, + nthreads = 1L +) +} +\arguments{ +\item{fwd}{a forward table as returned by \code{\link{MakeForwardTable}}} + +\item{bck}{a backward table as returned by \code{\link{MakeBackwardTable}}} + +\item{pars}{a \code{kalisParameters} object, as returned by +\code{\link{Parameters}}.} + +\item{beta.theta.opts}{a list; see Details for \code{\link{DistMat}}.} + +\item{safety.checks}{a logical, should safety checks be applied to the distances? See \code{\link{DistMat}}.} + +\item{neighbors}{a logical, should nearest neighbors be pre-calculated? See \code{\link{Neighbors}}.} + +\item{use.forking}{a logical, should forked processes be used?} + +\item{nthreads}{the number of CPU cores to use. Currently, no parallelism is used.} +} +\value{ +a \code{kalisClades} object encoding probabilistic clade calls +} +\description{ +Utility for calling probabilistic clades at, in between, or excluding variants. +} diff --git a/man/CreateForwardTableCache.Rd b/man/CreateForwardTableCache.Rd new file mode 100644 index 0000000..ff224ed --- /dev/null +++ b/man/CreateForwardTableCache.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TableCache.R +\name{CreateForwardTableCache} +\alias{CreateForwardTableCache} +\title{Create cache for forward tables} +\usage{ +CreateForwardTableCache( + pars, + size = 1, + from_recipient = 1, + to_recipient = Inf, + max.tables = 0 +) +} +\arguments{ +\item{pars}{a \code{kalisParameters} object, as returned by +\code{Parameters}.} + +\item{size}{the maximum amount of RAM (in GB) to devote to this cache.} + +\item{from_recipient}{first recipient haplotype if creating a partial forward +table cache. By default includes from the first recipient haplotype.} + +\item{to_recipient}{last recipient haplotype if creating a partial forward +table cache. By default includes to the last recipient haplotype.} +} +\value{ +A list of forward tables representing a cache and ready to be filled is +returned. +} +\description{ +Create an in-memory cache for forward tables to improve efficiency when +iterating in reverse along the haplotype sequences. +} +\details{ +If the objective is to run the Li and Stephens hidden Markov model both +forwards and backwards to the same locus (and to do so for every possible +locus), then considerable efficiency can be achieved by first performing a +full scan forwards, filling a geometrically spaced cache whilst doing so. +Then, by working backwards, the backward propagation moves one locus at a +time and the forward propagation can move backwards by moving forward from a +recently cached local table. + +Memory for a cache can be allocated using this function and should then be +filled with \code{\link{FillTableCache}}. +To use the cache, then instead of using the \code{\link{Forward}} function, +use \code{\link{ForwardUsingTableCache}}. +} +\examples{ +\dontrun{ +# This code assumes you have already: +# i) cached the haplotypes using CacheHaplotypes function +# ii) setup parameters in a variable called pars +# iii) set the number of loci in a variable called L + +# Allocate up to 10GB to a cache, with parameters already setup in pars ... +cache <- CreateForwardTableCache(pars, 10) +# ... and fill it +FillTableCache(cache, pars, nthreads = 8) + +# Create forward and backward tables +fwd <- MakeForwardTable(pars) +bck <- MakeBackwardTable(pars) + +# Then reach every locus faster by iterating backwards, using the cache to +# move the forward table into position faster +for(l in L:1) { + Backward(bck, pars, l, nthreads = 8) + ForwardUsingTableCache(fwd, pars, cache, l, nthreads = 8) + # Do whatever work is required at + # every locus here using fwd and bck +} +} + +} +\seealso{ +\code{\link{MakeForwardTable}} to make a forward table; +\code{\link{FillTableCache}} to fill a cache; +\code{\link{ForwardUsingTableCache}} to use a cache; +\code{\link{Forward}} for forward function without using a cache. +} diff --git a/man/FillTableCache.Rd b/man/FillTableCache.Rd new file mode 100644 index 0000000..b1fc2f8 --- /dev/null +++ b/man/FillTableCache.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TableCache.R +\name{FillTableCache} +\alias{FillTableCache} +\title{Fill a forward table cache} +\usage{ +FillTableCache( + cache, + pars, + nthreads = min(parallel::detectCores(logical = FALSE), cache[[1]]$to_recipient - + cache[[1]]$from_recipient + 1) +) +} +\arguments{ +\item{cache}{a cache of forward tables as generated by +\code{\link{CreateForwardTableCache}}} + +\item{pars}{a \code{kalisParameters} object, as returned by +\code{Parameters}.} + +\item{nthreads}{the number of CPU cores to use. +By default no parallelism is used.} + +\item{from}{the first locus which the geometrically spaced cache should be +built from. +By default the whole sequence length will be cached so this defaults to 1.} + +\item{to}{the last locus up to which the geometrically spaced cache should be +built. +By default the whole sequence length will be cached so this defaults to +\code{Inf}.} +} +\value{ +There is nothing returned. +For performance reasons, \code{cache} is updated in-place. +} +\description{ +An in-memory cache for forward tables can be filled using this function, for +either the whole sequence length or some sub-sequence. +} +\details{ +If the objective is to run the Li and Stephens hidden Markov model both +forwards and backwards to the same locus (and to do so for every possible +locus), then considerable efficiency can be achieved by first performing a +full scan forwards, filling a geometrically spaced cache whilst doing so. +Then, by working backwards, the backward propagation moves one locus at a +time and the forward propagation can move backwards by moving forward from a +recently cached local table. + +Memory for a cache can be allocated using +\code{\link{CreateForwardTableCache}} and should then be filled with this +function. +To use the cache, then instead of using the \code{\link{Forward}} function, +use \code{\link{ForwardUsingTableCache}}. +} +\examples{ +\dontrun{ +# This code assumes you have already: +# i) cached the haplotypes using CacheHaplotypes function +# ii) setup parameters in a variable called pars +# iii) set the number of loci in a variable called L + +# Allocate up to 10GB to a cache, with parameters already setup in pars ... +cache <- CreateForwardTableCache(pars, 10) +# ... and fill it +FillTableCache(cache, pars, nthreads = 8) + +# Create forward and backward tables +fwd <- MakeForwardTable(pars) +bck <- MakeBackwardTable(pars) + +# Then reach every locus faster by iterating backwards, using the cache to +# move the forward table into position faster +for(l in L:1) { + Backward(bck, pars, l, nthreads = 8) + ForwardUsingTableCache(fwd, pars, cache, l, nthreads = 8) + # Do whatever work is required at + # every locus here using fwd and bck +} +} + +} +\seealso{ +\code{\link{MakeForwardTable}} to make a forward table; +\code{\link{CreateForwardTableCache}} to generate a cache; +\code{\link{ForwardUsingTableCache}} to use a cache; +\code{\link{Forward}} for forward function without using a cache. +} diff --git a/man/ForwardIterator.Rd b/man/ForwardIterator.Rd new file mode 100644 index 0000000..668137f --- /dev/null +++ b/man/ForwardIterator.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Iterator.R +\name{ForwardIterator} +\alias{ForwardIterator} +\title{Build an efficient iterator over loci} +\usage{ +ForwardIterator( + pars, + ram.ckpts = 1L, + targets = 1:kalis:::pkgVars$L, + base.fwd.table = NULL, + disk.ckpts = 0, + disk.dir = NULL, + from_recipient = 1, + to_recipient = Inf, + lookup.tables = NULL, + cache = NULL, + save.cache = FALSE, + exact = TRUE, + force.unif = FALSE +) +} +\arguments{ +\item{ram.ckpts}{an integer specifying the number of checkpoints to store in RAM} + +\item{targets}{a vector of loci to iterate over (starting with the most downstream target)} + +\item{base.fwd.table}{a \code{kalisForwardTable} either at the most upstream target, or if the targets are evenly spaced, one interval upstream of the most upstream target. +NULL (the default) is interpretted as the prior \code{Pi}, see \code{\link{Parameters}}} + +\item{disk.ckpts}{an integer specifying the number of checkpoints to store on disk} + +\item{disk.dir}{a path to a directory where a temporary folder may be made to store checkpoints on disk} + +\item{force.unif}{a logical, if TRUE iterate over targets as if they were uniformly spaced. WARNING: DO NOT use this in conjunction with the targets method, still experimental. With force.unif = TRUE, the resulting iterator appear to be targeting the first length(targets) variants with all methods, but in fact will be silently iterating over the original targets.} +} +\description{ +Create a \code{kalisForwardIterator} for propagating a forward table iteratively over target loci using a table cache and optimal checkpointing. +} +\details{ +See example. +} +\examples{ +\dontrun{ +data("SmallHaps") +CacheHaplotypes(SmallHaps) +pars <- Parameters() +fwd <- MakeForwardTable(pars) +bck <- MakeBackwardTable(pars) +Iter <- ForwardIterator(2) +for(t in targets(Iter)){ + Iter(fwd,pars,t) + Backward(bck,pars,t) + print(paste("Mean Distance at locus",t,"is",mean(DistMat(fwd,bck)))) +} +} + +} +\seealso{ +\code{\link{MakeForwardTable}} to create a \code{kalisForwardTable}. +} diff --git a/man/ForwardUsingTableCache.Rd b/man/ForwardUsingTableCache.Rd new file mode 100644 index 0000000..87b26e9 --- /dev/null +++ b/man/ForwardUsingTableCache.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TableCache.R +\name{ForwardUsingTableCache} +\alias{ForwardUsingTableCache} +\title{Use a forward table cache to propagate} +\usage{ +ForwardUsingTableCache( + fwd, + pars, + cache, + t = fwd$l - 1, + nthreads = min(parallel::detectCores(logical = FALSE), fwd$to_recipient - + fwd$from_recipient + 1) +) +} +\arguments{ +\item{fwd}{a \code{kalisForwardTable} object, as returned by +\code{\link{MakeForwardTable}}.} + +\item{pars}{a \code{kalisParameters} object, as returned by +\code{Parameters}.} + +\item{cache}{a cache of forward tables as generated by +\code{\link{CreateForwardTableCache}} and filled using +\code{\link{FillTableCache}}.} + +\item{t}{a locus position to move the forward table to, starting the forward +propagation from whatever table in the \code{cache} variable is immediately +before locus \code{t}. +By default, it simply advances to the previous locus (which is the natural +direction to move when using the cache).} + +\item{nthreads}{the number of CPU cores to use. +By default no parallelism is used.} +} +\value{ +There is nothing returned. +For performance reasons, \code{fwd} is updated in-place. +} +\description{ +An in-memory cache for forward tables, which has already been filled, can be +used to move more quickly to a specified locus. +} +\details{ +If the objective is to run the Li and Stephens hidden Markov model both +forwards and backwards to the same locus (and to do so for every possible +locus), then considerable efficiency can be achieved by first performing a +full scan forwards, filling a geometrically spaced cache whilst doing so. +Then, by working backwards, the backward propagation moves one locus at a +time and the forward propagation can move backwards by moving forward from a +recently cached local table. + +Memory for a cache can be allocated using +\code{\link{CreateForwardTableCache}} and should then be filled with +\code{\link{FillTableCache}}. +To use the cache, then instead of using the \code{\link{Forward}} function, +use this function. + +Note that the \code{cache} which is passed to this function will be +dynamically updated based on the locus requested: the assumption is that +the cache is used to propagate in reverse so any cache entries for a locus +position past \code{t} are taken to be no longer needed and that space will +redeployed to more densely fill the cache with earlier locus positions. +} +\examples{ +\dontrun{ +# This code assumes you have already: +# i) cached the haplotypes using CacheHaplotypes function +# ii) setup parameters in a variable called pars +# iii) set the number of loci in a variable called L + +# Allocate up to 10GB to a cache, with parameters already setup in pars ... +cache <- CreateForwardTableCache(pars, 10) +# ... and fill it +FillTableCache(cache, pars, nthreads = 8) + +# Create forward and backward tables +fwd <- MakeForwardTable(pars) +bck <- MakeBackwardTable(pars) + +# Then reach every locus faster by iterating backwards, using the cache to +# move the forward table into position faster +for(l in L:1) { + Backward(bck, pars, l, nthreads = 8) + ForwardUsingTableCache(fwd, pars, cache, l, nthreads = 8) + # Do whatever work is required at + # every locus here using fwd and bck +} +} + +} +\seealso{ +\code{\link{MakeForwardTable}} to make a forward table; +\code{\link{CreateForwardTableCache}} to generate a cache; +\code{\link{FillTableCache}} to fill a cache; +\code{\link{Forward}} for forward function without using a cache. +} diff --git a/man/Neighbors.Rd b/man/Neighbors.Rd new file mode 100644 index 0000000..3aac88f --- /dev/null +++ b/man/Neighbors.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{Neighbors} +\alias{Neighbors} +\title{Neighbors} +\usage{ +Neighbors(x, use.forking = FALSE, nthreads = 1L) +} +\arguments{ +\item{x}{a \code{kalisClades} object returned by \code{\link{kalisClades}}} + +\item{use.forking}{a logical, should forked processes be used?} + +\item{nthreads}{the number of CPU cores to use. Currently, no parallelism is used.} +} +\value{ +a \code{kalisNeighbors} encoding the nearest neighbors for each recipient haplotype +} +\description{ +Utility for calling tied nearest neighbors for each recipient haplotype +} diff --git a/man/PruneCladeMat.Rd b/man/PruneCladeMat.Rd new file mode 100644 index 0000000..2cd1b48 --- /dev/null +++ b/man/PruneCladeMat.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{PruneCladeMat} +\alias{PruneCladeMat} +\title{PruneCladeMat} +\usage{ +PruneCladeMat(M, neigh, sprigs, prune = "singleton.info", from.recipient = 1L) +} +\description{ +PruneCladeMat +} diff --git a/man/Sprigs.Rd b/man/Sprigs.Rd new file mode 100644 index 0000000..c7afd3b --- /dev/null +++ b/man/Sprigs.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{Sprigs} +\alias{Sprigs} +\title{Sprigs} +\usage{ +Sprigs(x, old.sprigs = FALSE) +} +\description{ +Sprigs +} diff --git a/man/Sprigs_old.Rd b/man/Sprigs_old.Rd new file mode 100644 index 0000000..d8dc6f0 --- /dev/null +++ b/man/Sprigs_old.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Clades.R +\name{Sprigs_old} +\alias{Sprigs_old} +\title{Sprigs} +\usage{ +Sprigs_old(x, use.forking = FALSE, nthreads = 1L, add.self = TRUE) +} +\arguments{ +\item{x}{a \code{kalisNeighbors} object returned by \code{\link{kalisNeighbors}}, a \code{kalisClades} object returned by \code{\link{kalisClades}} with \code{neighbors = TRUE}, or a list} + +\item{use.forking}{a logical, should forked processes be used?} + +\item{nthreads}{the number of CPU cores to use. Currently, no parallelism is used.} +} +\value{ +a \code{kalisSprigs} object assigning each haplotype to a sprig +} +\description{ +Utility for calling sprigs from probabilistic clades +}