diff --git a/R/CladeMat.R b/R/CladeMat.R index 2e1a601..ad8c6a7 100644 --- a/R/CladeMat.R +++ b/R/CladeMat.R @@ -1,7 +1,42 @@ -#' Fast Clade Matrix Construction +#' Fast clade matrix construction +#' +#' Constructs a clade matrix using forward and backward tables. +#' The clade matrix captures genetic relatedness information in the distances from the Li & Stephens model that are not captured in the called clades. +#' +#' `CladeMat()` uses the forward and backward tables to construct the corresponding clade matrix which can then be tested, for example using a standard quadratic form score statistic. +#' +#' @references +#' Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. +#' +#' @param fwd +#' a `kalisForwardTable` object, as returned by [MakeForwardTable()] and propagated to a target variant by [Forward()]. +#' This table must be at the same variant location as argument `bck`. +#' @param bck +#' a `kalisBackwardTable` object, as returned by [MakeBackwardTable()] and propagated to a target variant by [Backward()]. +#' This table must be at the same variant location as argument `fwd`. +#' @param M +#' a matrix with half the number of rows and columns as the corresponding forward/backward tables. +#' This matrix is overwritten in place with the clade matrix result for performance reasons. +#' @param unit.dist +#' TODO +#' @param thresh +#' TODO +#' @param max1var +#' TODO +#' @param nthreads +#' the number of CPU cores to use. +#' By default uses the `parallel` package to detect the number of physical cores. +#' +#' @return +#' TODO: this returns from C the list containing neighbourhood, similarity and total clades info, but need full details and list structure. +#' +#' @examples +#' # TODO +#' +#' #' @export CladeMat CladeMat <- function(fwd, bck, M, unit.dist, thresh = 0.2, max1var = FALSE, - nthreads = min(parallel::detectCores(logical = FALSE), fwd$to_recipient-fwd$from_recipient+1)){ + nthreads = min(parallel::detectCores(logical = FALSE), fwd$to_recipient-fwd$from_recipient+1)){ # input checks ######################### diff --git a/R/Clades.R b/R/Clades.R index 0a29988..53c1d33 100644 --- a/R/Clades.R +++ b/R/Clades.R @@ -199,14 +199,38 @@ PruneCladeMat <- function(M, neigh, sprigs, prune = "singleton.info", from.recip #' Probabilistic Clades #' #' Utility for calling probabilistic clades at, in between, or excluding variants. -#' @param fwd a forward table as returned by [MakeForwardTable()] -#' @param bck a backward table as returned by [MakeBackwardTable()] -#' @param pars a `kalisParameters` object, as returned by [Parameters()]. -#' @param beta.theta.opts a list; see Details for [DistMat()]. -#' @param safety.checks a logical, should safety checks be applied to the distances? See [DistMat()]. -#' @param neighbors a logical, should nearest neighbors be pre-calculated? See [Neighbors()]. -#' @param use.forking a logical, should forked processes be used? -#' @param nthreads the number of CPU cores to use. Currently, no parallelism is used. +#' +#' TODO longer description +#' +#' @references +#' Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. +#' +#' @param fwd +#' a `kalisForwardTable` object, as returned by [MakeForwardTable()] and propagated to a target variant by [Forward()]. +#' This table must be at the same variant location as argument `bck`. +#' @param bck +#' a `kalisBackwardTable` object, as returned by [MakeBackwardTable()] and propagated to a target variant by [Backward()]. +#' This table must be at the same variant location as argument `fwd`. +#' @param pars +#' a `kalisParameters` object, as returned by [Parameters()]. +#' @param beta.theta.opts +#' a list; see Details in [DistMat()] documentation page. +#' @param safety.checks +#' a logical, should safety checks be applied to the distances? +#' See [DistMat()]. +#' @param neighbors +#' a logical, should nearest neighbors be pre-calculated? +#' See [Neighbors()]. +#' @param use.forking +#' a logical, should forked processes be used? +#' @param forking.chunk.size +#' TODO +#' @param mc.preschedule +#' TODO +#' @param nthreads +#' the number of CPU cores to use. +#' By default no parallelism is used. +#' #' @return #' a `kalisClades` object encoding probabilistic clade calls #' diff --git a/R/TableCache.R b/R/TableCache.R index 5de70dc..98287f1 100644 --- a/R/TableCache.R +++ b/R/TableCache.R @@ -16,16 +16,27 @@ #' To use the cache, then instead of using the [Forward()] function, #' use [ForwardUsingTableCache()]. #' -#' @param pars a `kalisParameters` object, as returned by [Parameters()]. -#' @param size the maximum amount of RAM (in GB) to devote to this cache. -#' @param from_recipient first recipient haplotype if creating a partial forward -#' table cache. By default includes from the first recipient haplotype. -#' @param to_recipient last recipient haplotype if creating a partial forward -#' table cache. By default includes to the last recipient haplotype. +#' @references +#' Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. +#' +#' @param pars +#' a `kalisParameters` object, as returned by [Parameters()]. +#' @param size +#' the maximum amount of RAM (in GB) to devote to this cache. +#' By default, 1GB. +#' @param from_recipient +#' first recipient haplotype if creating a partial forward table cache. +#' By default includes from the first recipient haplotype. +#' @param to_recipient +#' last recipient haplotype if creating a partial forward table cache. +#' By default includes to the last recipient haplotype. +#' @param max.tables +#' positive integer indicating the maximum number of tables to use in the cache. +#' Both this and `size` will be honoured, so the number of tables may be smaller than this. +#' By default, equals \eqn{\lfloor\log_2(L)\rfloor}. #' #' @return -#' A list of forward tables representing a cache and ready to be filled is -#' returned. +#' A list of forward tables representing a cache and ready to be filled is returned. #' #' @seealso #' [MakeForwardTable()] to make a forward table; @@ -128,18 +139,13 @@ CreateForwardTableCache <- function(pars, size = 1, from_recipient = 1, to_recip #' To use the cache, then instead of using the [Forward()] function, #' use [ForwardUsingTableCache()]. #' -#' @param cache a cache of forward tables as generated by -#' [CreateForwardTableCache()] -#' @param pars a `kalisParameters` object, as returned by [Parameters()]. -#' @param from the first locus which the geometrically spaced cache should be -#' built from. -#' By default the whole sequence length will be cached so this defaults to 1. -#' @param to the last locus up to which the geometrically spaced cache should be -#' built. -#' By default the whole sequence length will be cached so this defaults to -#' `Inf`. -#' @param nthreads the number of CPU cores to use. -#' By default no parallelism is used. +#' @param cache +#' a cache of forward tables as generated by [CreateForwardTableCache()]. +#' @param pars +#' a `kalisParameters` object, as returned by [Parameters()]. +#' @param nthreads +#' the number of CPU cores to use. +#' By default uses the `parallel` package to detect the number of physical cores. #' #' @return #' There is nothing returned. diff --git a/man/CladeMat.Rd b/man/CladeMat.Rd index 7eed863..86af088 100644 --- a/man/CladeMat.Rd +++ b/man/CladeMat.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/CladeMat.R \name{CladeMat} \alias{CladeMat} -\title{Fast Clade Matrix Construction} +\title{Fast clade matrix construction} \usage{ CladeMat( fwd, @@ -15,6 +15,40 @@ CladeMat( fwd$from_recipient + 1) ) } +\arguments{ +\item{fwd}{a \code{kalisForwardTable} object, as returned by \code{\link[=MakeForwardTable]{MakeForwardTable()}} and propagated to a target variant by \code{\link[=Forward]{Forward()}}. +This table must be at the same variant location as argument \code{bck}.} + +\item{bck}{a \code{kalisBackwardTable} object, as returned by \code{\link[=MakeBackwardTable]{MakeBackwardTable()}} and propagated to a target variant by \code{\link[=Backward]{Backward()}}. +This table must be at the same variant location as argument \code{fwd}.} + +\item{M}{a matrix with half the number of rows and columns as the corresponding forward/backward tables. +This matrix is overwritten in place with the clade matrix result for performance reasons.} + +\item{unit.dist}{TODO} + +\item{thresh}{TODO} + +\item{max1var}{TODO} + +\item{nthreads}{the number of CPU cores to use. +By default uses the \code{parallel} package to detect the number of physical cores.} +} +\value{ +TODO: this returns from C the list containing neighbourhood, similarity and total clades info, but need full details and list structure. +} \description{ -Fast Clade Matrix Construction +Constructs a clade matrix using forward and backward tables. +The clade matrix captures genetic relatedness information in the distances from the Li & Stephens model that are not captured in the called clades. +} +\details{ +\code{CladeMat()} uses the forward and backward tables to construct the corresponding clade matrix which can then be tested, for example using a standard quadratic form score statistic. +} +\examples{ +# TODO + + +} +\references{ +Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. } diff --git a/man/Clades.Rd b/man/Clades.Rd index b5552c5..8e8dee7 100644 --- a/man/Clades.Rd +++ b/man/Clades.Rd @@ -18,21 +18,30 @@ Clades( ) } \arguments{ -\item{fwd}{a forward table as returned by \code{\link[=MakeForwardTable]{MakeForwardTable()}}} +\item{fwd}{a \code{kalisForwardTable} object, as returned by \code{\link[=MakeForwardTable]{MakeForwardTable()}} and propagated to a target variant by \code{\link[=Forward]{Forward()}}. +This table must be at the same variant location as argument \code{bck}.} -\item{bck}{a backward table as returned by \code{\link[=MakeBackwardTable]{MakeBackwardTable()}}} +\item{bck}{a \code{kalisBackwardTable} object, as returned by \code{\link[=MakeBackwardTable]{MakeBackwardTable()}} and propagated to a target variant by \code{\link[=Backward]{Backward()}}. +This table must be at the same variant location as argument \code{fwd}.} \item{pars}{a \code{kalisParameters} object, as returned by \code{\link[=Parameters]{Parameters()}}.} -\item{beta.theta.opts}{a list; see Details for \code{\link[=DistMat]{DistMat()}}.} +\item{beta.theta.opts}{a list; see Details in \code{\link[=DistMat]{DistMat()}} documentation page.} -\item{safety.checks}{a logical, should safety checks be applied to the distances? See \code{\link[=DistMat]{DistMat()}}.} +\item{safety.checks}{a logical, should safety checks be applied to the distances? +See \code{\link[=DistMat]{DistMat()}}.} -\item{neighbors}{a logical, should nearest neighbors be pre-calculated? See \code{\link[=Neighbors]{Neighbors()}}.} +\item{neighbors}{a logical, should nearest neighbors be pre-calculated? +See \code{\link[=Neighbors]{Neighbors()}}.} \item{use.forking}{a logical, should forked processes be used?} -\item{nthreads}{the number of CPU cores to use. Currently, no parallelism is used.} +\item{forking.chunk.size}{TODO} + +\item{mc.preschedule}{TODO} + +\item{nthreads}{the number of CPU cores to use. +By default no parallelism is used.} } \value{ a \code{kalisClades} object encoding probabilistic clade calls @@ -40,3 +49,9 @@ a \code{kalisClades} object encoding probabilistic clade calls \description{ Utility for calling probabilistic clades at, in between, or excluding variants. } +\details{ +TODO longer description +} +\references{ +Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. +} diff --git a/man/CreateForwardTableCache.Rd b/man/CreateForwardTableCache.Rd index 865844d..4914e47 100644 --- a/man/CreateForwardTableCache.Rd +++ b/man/CreateForwardTableCache.Rd @@ -15,17 +15,21 @@ CreateForwardTableCache( \arguments{ \item{pars}{a \code{kalisParameters} object, as returned by \code{\link[=Parameters]{Parameters()}}.} -\item{size}{the maximum amount of RAM (in GB) to devote to this cache.} +\item{size}{the maximum amount of RAM (in GB) to devote to this cache. +By default, 1GB.} -\item{from_recipient}{first recipient haplotype if creating a partial forward -table cache. By default includes from the first recipient haplotype.} +\item{from_recipient}{first recipient haplotype if creating a partial forward table cache. +By default includes from the first recipient haplotype.} -\item{to_recipient}{last recipient haplotype if creating a partial forward -table cache. By default includes to the last recipient haplotype.} +\item{to_recipient}{last recipient haplotype if creating a partial forward table cache. +By default includes to the last recipient haplotype.} + +\item{max.tables}{positive integer indicating the maximum number of tables to use in the cache. +Both this and \code{size} will be honoured, so the number of tables may be smaller than this. +By default, equals \eqn{\lfloor\log_2(L)\rfloor}.} } \value{ -A list of forward tables representing a cache and ready to be filled is -returned. +A list of forward tables representing a cache and ready to be filled is returned. } \description{ Create an in-memory cache for forward tables to improve efficiency when @@ -71,6 +75,9 @@ for(l in L:1) { } } +} +\references{ +Christ, R., Wang, X., Aslett, L.J.M., Steinsaltz, D. and Hall, I. (2024) "Clade Distillation for Genome-wide Association Studies." bioRxiv 2024.09.30.615852. Available at: \doi{10.1101/2024.09.30.615852}. } \seealso{ \code{\link[=MakeForwardTable]{MakeForwardTable()}} to make a forward table; diff --git a/man/FillTableCache.Rd b/man/FillTableCache.Rd index dc3865a..531682a 100644 --- a/man/FillTableCache.Rd +++ b/man/FillTableCache.Rd @@ -12,22 +12,12 @@ FillTableCache( ) } \arguments{ -\item{cache}{a cache of forward tables as generated by -\code{\link[=CreateForwardTableCache]{CreateForwardTableCache()}}} +\item{cache}{a cache of forward tables as generated by \code{\link[=CreateForwardTableCache]{CreateForwardTableCache()}}.} \item{pars}{a \code{kalisParameters} object, as returned by \code{\link[=Parameters]{Parameters()}}.} \item{nthreads}{the number of CPU cores to use. -By default no parallelism is used.} - -\item{from}{the first locus which the geometrically spaced cache should be -built from. -By default the whole sequence length will be cached so this defaults to 1.} - -\item{to}{the last locus up to which the geometrically spaced cache should be -built. -By default the whole sequence length will be cached so this defaults to -\code{Inf}.} +By default uses the \code{parallel} package to detect the number of physical cores.} } \value{ There is nothing returned.