From 7a0be7a8eae5743c5ac6e50ab910e896adffb45a Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Wed, 20 Mar 2024 11:09:33 -0700 Subject: [PATCH] Update man pages for extreme p-value filtering --- man/admixMap.Rd | 6 ++++-- man/assocTestAggregate.Rd | 5 ++++- man/assocTestSingle.Rd | 14 ++++++++------ man/fitNullModel.Rd | 3 +++ man/jointScoreTest.Rd | 4 ++++ 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/man/admixMap.Rd b/man/admixMap.Rd index a7b70b7f..5e351847 100644 --- a/man/admixMap.Rd +++ b/man/admixMap.Rd @@ -7,7 +7,7 @@ Run admixture analyses } \usage{ admixMap(admixDataList, null.model, male.diploid=TRUE, - genome.build=c("hg19", "hg38"), + genome.build=c("hg19", "hg38"), BPPARAM=bpparam(), verbose=TRUE) } @@ -42,6 +42,8 @@ See the example for how one might set up the \code{admixDataList} object. List n \item{Joint.pval}{The Wald p-value for the joint test of all local ancestry terms} } +p-values that are calculated using \code{pchisq} and are smaller than \code{.Machine\$double.xmin} are set to \code{.Machine\$double.xmin}. + \author{Matthew P. Conomos, Lisa Brown, Stephanie M. Gogarten, Tamar Sofer, Ken Rice, Chaoyu Yu} \seealso{\code{\link{GenotypeIterator}}, \code{\link{fitNullModel}}, \code{\link{assocTestSingle}}} @@ -77,7 +79,7 @@ genoIterators <- lapply(genoDataList[1:2], GenotypeBlockIterator) null.model <- fitNullModel(scanAnnot, outcome="pheno", covars="covar") # run the association test -myassoc <- admixMap(genoIterators, null.model, +myassoc <- admixMap(genoIterators, null.model, BPPARAM=BiocParallel::SerialParam()) head(myassoc) diff --git a/man/assocTestAggregate.Rd b/man/assocTestAggregate.Rd index ceaf5036..99751568 100644 --- a/man/assocTestAggregate.Rd +++ b/man/assocTestAggregate.Rd @@ -117,7 +117,7 @@ \item{Score_burden}{The value of the score function for the burden test} \item{Score.SE_burden}{The estimated standard error of the Score for the burden test} \item{Stat_burden}{The score Z test statistic for the burden test} - \item{pval_burden}{The burden test p-value} + \item{pval_burden}{The burden test p-value.} \item{Q_theta}{The test statistic for the adjusted SKAT test (which is asymptotically independent of the burden test)} \item{pval_theta}{The p-value of the adjusted SKAT test (which is asymptotically independent of the burden test)} \item{pval_SMMAT}{The SMMAT p-value after combining pval_burden and pval_theta using Fisher's method.} @@ -147,6 +147,9 @@ \item{freq}{The estimated effect allele frequency} \item{MAC}{The minor allele count. For multiallelic variants, "minor" is determined by comparing the count of the allele specified by \code{allele.index} with the sum of all other alleles.} \item{weight}{The weight assigned to the variant in the analysis.} + +p-values that are calculated using \code{pchisq} and are smaller than \code{.Machine\$double.xmin} are set to \code{.Machine\$double.xmin}. + } \author{Matthew P. Conomos, Stephanie M. Gogarten, Thomas Lumley, Tamar Sofer, Ken Rice, Chaoyu Yu, Han Chen} diff --git a/man/assocTestSingle.Rd b/man/assocTestSingle.Rd index dd761450..1bbe4108 100644 --- a/man/assocTestSingle.Rd +++ b/man/assocTestSingle.Rd @@ -13,7 +13,7 @@ \S4method{assocTestSingle}{SeqVarIterator}(gdsobj, null.model, test=c("Score", "Score.SPA", "BinomiRare", "CMP"), recalc.pval.thresh=0.05, fast.score.SE=FALSE, - GxE=NULL, + GxE=NULL, geno.coding=c("additive", "dominant", "recessive"), sparse=TRUE, imputed=FALSE, male.diploid=TRUE, genome.build=c("hg19", "hg38"), @@ -47,14 +47,14 @@ \code{assocTestSingle} uses the \code{\link{BiocParallel}} package to process iterator chunks in parallel. See the \code{\link{BiocParallel}} documentation for more information on the default behaviour of \code{\link{bpparam}} and how to register different parallel backends. If serial execution is desired, set \code{BPPARAM=BiocParallel::SerialParam()}. Note that parallel execution requires more RAM than serial execution. All samples included in \code{null model} will be included in the association test, even if a different set of samples is present in the current filter for \code{gdsobj}. - + The effect size estimate is for each copy of the alternate allele (when \code{gdsobj} is a \code{\link{SeqVarIterator}} object) or the "A" allele (when \code{gdsobj} is a \code{\link{GenotypeIterator}} object). We refer to this as the "effect allele" in the rest of the documentation. For multiallelic variants in \code{\link{SeqVarIterator}} objects, each alternate (or "A") allele is tested separately. %When \code{impute.geno} is TRUE, sporadic missing genotype values are mean imputed using the minor allele frequency (MAF) calculated on all other samples at that SNP. When \code{impute.geno} is FALSE, samples with missing values for all of the SNP genotypes in the current SNP block are removed from the analysis for the block; this may significantly slow down computation time because many pre-computed matrices need to be re-computed each time the sample set changes. Also note: when \code{impute.geno} is FALSE, sporadic missingness for a sample inside of a SNP block will lead to an error. Sporadic missing genotype values are mean imputed using the allele frequency calculated on all other samples at that variant. Monomorphic variants (including variants where every sample is a heterozygote) are omitted from the results. - + The input \code{GxE} can be used to perform GxE tests. Multiple interaction variables may be specified, but all interaction variables specified must have been included as covariates in fitting the null model with \code{fitNullModel}. When performing GxE analyses, \code{assocTestSingle} will report two tests: (1) the joint Wald test of all genotype interaction terms in the model (this is the test for any genotype interaction effect), and (2) the joint Wald test of the genotype term along with all of the genotype interaction terms (this is the test for any genetic effect). Individual genotype interaction terms can be tested by creating test statistics from the reported effect size estimates and their standard errors (Note: when \code{GxE} contains a single continuous or binary covariate, this test is the same as the test for any genotype interaction effect mentioned above). %In order to test more complex hypotheses regarding subsets of multiple genotype interaction terms, \code{ivar.return.betaCov} can be used to retrieve the estimated covariance matrix of the effect size estimates. The saddle point approximation (SPA), run by using \code{test = "Score.SPA"}, implements the method described by Dey et al. (2017), which was extended to mixed models by Zhou et al. (2018) in their SAIGE software. SPA provides better calibration of p-values when fitting mixed models with a binomial family for a sample with an imbalanced case to control ratio. @@ -89,7 +89,7 @@ \item{PVE}{An approximation of the proportion of phenotype variance explained} % If \code{test} is \code{"Wald"} and \code{GxE} is \code{NULL}: % \item{Est}{The effect size estimate for each additional copy of the effect allele} - % \item{Est.SE}{The estimated standard error of the effect size estimate} + % \item{Est.SE}{The estimated standard error of the effect size estimate} % \item{Wald.Stat}{The Wald Z test statistic} % \item{Wald.pval}{The Wald p-value} If \code{test} is \code{"Score.SPA"}: @@ -109,10 +109,12 @@ \item{n.D.carrier}{Number of cases with at least one copy of the effect allele} \item{pval}{p-value} \item{mid.pval}{mid-p-value} - + %When \code{GxE} is not \code{NULL}, if \code{ivar.return.betaCov} is \code{TRUE}, then the output is a list with two elements. The first, "results", is the data.frame described above. The second, "betaCov", is a list with length equal to the number of rows of "results", where each element of the list is the covariance matrix of the effect size estimates (betas) for the genotype and genotype interaction terms. } +p-values that are calculated using \code{pchisq} and are smaller than \code{.Machine\$double.xmin} are set to \code{.Machine\$double.xmin}. + \references{ Dey, R., Schmidt, E. M., Abecasis, G. R., & Lee, S. (2017). A fast and accurate algorithm to test for binary phenotypes and its application to PheWAS. The American Journal of Human Genetics, 101(1), 37-49. @@ -157,7 +159,7 @@ nullmod <- fitNullModel(iterator, outcome="outcome", covars="sex") # run the association test assoc <- assocTestSingle(iterator, nullmod, BPPARAM=BiocParallel::SerialParam()) - + # use fast score SE for a null model with a covariance matrix seqResetFilter(seqData) grm <- SNPRelate::snpgdsGRM(seqData, verbose=FALSE) diff --git a/man/fitNullModel.Rd b/man/fitNullModel.Rd index ae812549..6a18cd31 100644 --- a/man/fitNullModel.Rd +++ b/man/fitNullModel.Rd @@ -197,6 +197,9 @@ The \code{score.table} data frame contains the following columns: \item{Score.SE.fast}{The estimated fast standard error of the Score (before scalar correction)} \item{se.ratio}{The ratio of Score.SE to Score.SE.fast; these values are averaged across varaints to estimate \code{se.correction} in \code{nullModelFastScore}.} } + +p-values that are calculated using \code{pchisq} and are smaller than \code{.Machine\$double.xmin} are set to \code{.Machine\$double.xmin}. + } \references{ diff --git a/man/jointScoreTest.Rd b/man/jointScoreTest.Rd index 9f31e7a6..3149be14 100644 --- a/man/jointScoreTest.Rd +++ b/man/jointScoreTest.Rd @@ -31,6 +31,10 @@ jointScoreTest(null.model, G) \item{fixef}{A data.frame with joint effect size estimates (Est), standard errors (SE), chi-squared test statistics (Stat), p-values (pval), and estimated proportion of variance explained (PVE) for each of the variants specified in \code{G}.} \item{betaCov}{Estimated covariance matrix for the variants in \code{G}.} } + +p-values that are calculated using \code{pchisq} and are smaller than \code{.Machine\$double.xmin} are set to \code{.Machine\$double.xmin}. + + %\references{ %}