From 20a647ef3b0471e84765fc117b2d59d8727abc27 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Fri, 4 Mar 2022 07:37:35 -0600 Subject: [PATCH 01/20] new 2nd moment of truncated normal function. only works w scalar inputs for now --- R/truncnorm.R | 225 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 160 insertions(+), 65 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 4dfe62a..52daaf9 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -118,72 +118,167 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #' #' @export #' +# my_e2truncnorm = function(a, b, mean = 0, sd = 1) { +# do_truncnorm_argchecks(a, b) +# +# alpha = (a - mean) / sd +# beta = (b - mean) / sd +# +# # Flip alpha and beta when both are positive (as above, but the mean is +# # also recycled and flipped so that we don't have to flip back). +# flip = (alpha > 0 & beta > 0) +# flip[is.na(flip)] = FALSE +# orig.alpha = alpha +# alpha[flip] = -beta[flip] +# beta[flip] = -orig.alpha[flip] +# if (any(mean != 0)) { +# mean = rep(mean, length.out = length(alpha)) +# mean[flip] = -mean[flip] +# } +# +# pnorm.diff = logscale_sub(pnorm(beta, log.p = TRUE), pnorm(alpha, log.p = TRUE)) +# alpha.frac = alpha * exp(dnorm(alpha, log = TRUE) - pnorm.diff) +# beta.frac = beta * exp(dnorm(beta, log = TRUE) - pnorm.diff) +# +# # Create a vector or matrix of 1's with NA's in the correct places. +# if (is.matrix(alpha)) +# scaled.res = array(1, dim = dim(alpha)) +# else +# scaled.res = rep(1, length.out = length(alpha)) +# is.na(scaled.res) = is.na(flip) +# +# alpha.idx = is.finite(alpha) +# scaled.res[alpha.idx] = 1 + alpha.frac[alpha.idx] +# beta.idx = is.finite(beta) +# scaled.res[beta.idx] = scaled.res[beta.idx] - beta.frac[beta.idx] +# +# # Handle approximately equal endpoints. +# endpts.equal = is.infinite(pnorm.diff) +# scaled.res[endpts.equal] = (alpha[endpts.equal] + beta[endpts.equal])^2 / 4 +# +# # Check that the results make sense. When beta is negative, +# # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared +# # value, and it is typically a good approximation as beta goes to -Inf. +# # When the endpoints are very close to one another, the expected squared +# # value of the uniform distribution on [alpha, beta] is a better upper +# # bound (and approximation). +# upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) +# upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 +# upper.bd = pmin(upper.bd1, upper.bd2) +# bad.idx = (!is.na(beta) & beta < 0 +# & (scaled.res < beta^2 | scaled.res > upper.bd)) +# scaled.res[bad.idx] = upper.bd[bad.idx] +# +# res = mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta) + sd^2 * scaled.res +# # message(paste("μ^2 =",mean^2)) +# # message(paste("σ^2 =",sd^2)) +# # message(paste("tnmom2(α, β) =",scaled.res)) +# # message(paste("2μ * σ =",2 * mean * sd)) +# # message(paste("tnmean(α, β) =",my_etruncnorm(alpha, beta))) +# # +# # message(paste("μ^2 + σ^2 * tnmom2(α, β) =",mean^2 + sd^2 * scaled.res)) +# # message(paste("2μ * σ * tnmean(α, β) =",2 * mean * sd * my_etruncnorm(alpha, beta))) +# # +# # message(paste("μ^2 + 2μ * σ * tnmean(α, β)=",mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta))) +# # message(paste("σ^2 * tnmom2(α, β) =",sd^2 * scaled.res)) +# +# # Handle zero sds. +# if (any(sd == 0)) { +# a = rep(a, length.out = length(res)) +# b = rep(b, length.out = length(res)) +# mean = rep(mean, length.out = length(res)) +# +# sd.zero = (sd == 0) +# res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 +# res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 +# res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean]^2 +# } +# +# return(res) +# } + +library(RcppFaddeeva) my_e2truncnorm = function(a, b, mean = 0, sd = 1) { - do_truncnorm_argchecks(a, b) - - alpha = (a - mean) / sd - beta = (b - mean) / sd - - # Flip alpha and beta when both are positive (as above, but the mean is - # also recycled and flipped so that we don't have to flip back). - flip = (alpha > 0 & beta > 0) - flip[is.na(flip)] = FALSE - orig.alpha = alpha - alpha[flip] = -beta[flip] - beta[flip] = -orig.alpha[flip] - if (any(mean != 0)) { - mean = rep(mean, length.out = length(alpha)) - mean[flip] = -mean[flip] - } - - pnorm.diff = logscale_sub(pnorm(beta, log.p = TRUE), pnorm(alpha, log.p = TRUE)) - alpha.frac = alpha * exp(dnorm(alpha, log = TRUE) - pnorm.diff) - beta.frac = beta * exp(dnorm(beta, log = TRUE) - pnorm.diff) - - # Create a vector or matrix of 1's with NA's in the correct places. - if (is.matrix(alpha)) - scaled.res = array(1, dim = dim(alpha)) - else - scaled.res = rep(1, length.out = length(alpha)) - is.na(scaled.res) = is.na(flip) - - alpha.idx = is.finite(alpha) - scaled.res[alpha.idx] = 1 + alpha.frac[alpha.idx] - beta.idx = is.finite(beta) - scaled.res[beta.idx] = scaled.res[beta.idx] - beta.frac[beta.idx] - - # Handle approximately equal endpoints. - endpts.equal = is.infinite(pnorm.diff) - scaled.res[endpts.equal] = (alpha[endpts.equal] + beta[endpts.equal])^2 / 4 - - # Check that the results make sense. When beta is negative, - # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared - # value, and it is typically a good approximation as beta goes to -Inf. - # When the endpoints are very close to one another, the expected squared - # value of the uniform distribution on [alpha, beta] is a better upper - # bound (and approximation). - upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) - upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 - upper.bd = pmin(upper.bd1, upper.bd2) - bad.idx = (!is.na(beta) & beta < 0 - & (scaled.res < beta^2 | scaled.res > upper.bd)) - scaled.res[bad.idx] = upper.bd[bad.idx] - - res = mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta) + sd^2 * scaled.res - - # Handle zero sds. - if (any(sd == 0)) { - a = rep(a, length.out = length(res)) - b = rep(b, length.out = length(res)) - mean = rep(mean, length.out = length(res)) - - sd.zero = (sd == 0) - res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 - res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 - res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean]^2 - } - - return(res) + # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl + do_truncnorm_argchecks(a, b) + + if ((mean == 0) && (sd ==1)){ + #standard normal distribution + if (a == b){ + # point mass + # ⟹ 2nd moment is point-value squared + return(a^2) + } + else if (abs(a) > abs(b)) { + # forces a and b to satisfy b ≥ 0, |a| ≤ |b| + return(my_e2truncnorm(-b, -a)) + } + else if (is.infinite(a) && is.infinite(b)){ + # untruncated normal distribution + # ⟹ 2nd moment is 1 + return(1.0) + } + else if (is.infinite(b)) { + # truncated to [a,∞) + # ⟹ 2nd moment simplifies to 1 + aϕ(a)/(1 - Φ(a)) + m2 = 1 + sqrt(2 / pi) * a / Re(erfcx(a / sqrt(2))) + stopifnot(a^2 <= m2) + return(m2) + } + + #now everything is finite, b ≥ 0, and |a| ≤ |b|, so + # either a ≤ 0 ≤ b or 0 ≤ a ≤ b + stopifnot(a < b, b < Inf, abs(a) <= abs(b)) + stopifnot(((a <= 0) && (0 <= b)) || ((0 <= a) && (a <= b))) + + if ((a <= 0) && (0 <= b)){ + # a ≤ 0 ≤ b + #catestrophic cancellation is less of an issue + ea = sqrt(pi/2) * Re(erf(a / sqrt(2))) + eb = sqrt(pi/2) * Re(erf(b / sqrt(2))) + fa = ea - a * exp(-a^2 / 2) + fb = eb - b * exp(-b^2 / 2) + m2 = (fb - fa) / (eb - ea) + stopifnot(fb >= fa, eb >= ea) + stopifnot(0 <= m2, m2 <= 1) + return(m2) + } + else { + # 0 ≤ a ≤ b + #strategically avoid catestrophic cancellation as much as possible + exdiff = exp((a - b)*(a + b)/2) + ea = sqrt(pi/2) * Re(erfcx(a / sqrt(2))) + eb = sqrt(pi/2) * Re(erfcx(b / sqrt(2))) + fa = ea + a + fb = eb + b + m2 = (fa - fb * exdiff) / (ea - eb * exdiff) + stopifnot(a^2 <= m2, m2 <= b^2) + return(m2) + } + } + else if (sd > 0){ + #nonstandard normal + alpha = (a - mean) / sd + beta = (b - mean) / sd + # TODO potential for catestrophic cancellation here... is there a better way? + return(mean^2 + sd^2 * my_e2truncnorm(alpha, beta) + 2 * mean * sd * my_etruncnorm(alpha, beta)) + } + else { + #point mass + # TODO potential error in original the julia code?? + # i wrote what I think it should be, but that doesn't agree with + # the original code + if ((a <= mean) && (a <= b)) { + # ⟹ if mean ∈ [a,b], 2nd moment is mean^2 + return(mean^2) + } else if (mean < a){ + # ⟹ if mean < a, 2nd moment is a^2 + return(a^2) + } else if (mean > b){ + # ⟹ if mean > a, 2nd moment is b^2 + return(b^2) + } + } } #' @title Variance of Truncated Normal From d877bd7112da5bc0072b59c990e24fce9ba87ccf Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Fri, 4 Mar 2022 07:41:05 -0600 Subject: [PATCH 02/20] remove old code --- R/truncnorm.R | 105 -------------------------------------------------- 1 file changed, 105 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 52daaf9..c9a7645 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -92,111 +92,6 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { return(res) } -#' @title Expected Squared Value of Truncated Normal -#' -#' @description Computes the expected squared values of truncated normal -#' distributions with parameters \code{a}, \code{b}, \code{mean}, and -#' \code{sd}. Arguments can be scalars, vectors, or matrices. Arguments of -#' shorter length will be recycled according to the usual recycling rules, -#' but \code{a} and \code{b} must have the same length. Missing values are -#' accepted for all arguments. -#' -#' @inheritParams my_etruncnorm -#' -#' @param sd The standard deviation of the untruncated normal. Standard -#' deviations of zero are interpreted as numerically (rather than exactly) -#' zero, so that the square of the untruncated mean is returned if it lies -#' within \code{[a, b]} and the square of the nearer of \code{a} and -#' \code{b} is returned otherwise. -#' -#' @return The expected squared values of truncated normal -#' distributions with parameters \code{a}, \code{b}, \code{mean}, and -#' \code{sd}. If any of the arguments is a matrix, then a matrix will -#' be returned. -#' -#' @seealso \code{\link{my_etruncnorm}}, \code{\link{my_vtruncnorm}} -#' -#' @export -#' -# my_e2truncnorm = function(a, b, mean = 0, sd = 1) { -# do_truncnorm_argchecks(a, b) -# -# alpha = (a - mean) / sd -# beta = (b - mean) / sd -# -# # Flip alpha and beta when both are positive (as above, but the mean is -# # also recycled and flipped so that we don't have to flip back). -# flip = (alpha > 0 & beta > 0) -# flip[is.na(flip)] = FALSE -# orig.alpha = alpha -# alpha[flip] = -beta[flip] -# beta[flip] = -orig.alpha[flip] -# if (any(mean != 0)) { -# mean = rep(mean, length.out = length(alpha)) -# mean[flip] = -mean[flip] -# } -# -# pnorm.diff = logscale_sub(pnorm(beta, log.p = TRUE), pnorm(alpha, log.p = TRUE)) -# alpha.frac = alpha * exp(dnorm(alpha, log = TRUE) - pnorm.diff) -# beta.frac = beta * exp(dnorm(beta, log = TRUE) - pnorm.diff) -# -# # Create a vector or matrix of 1's with NA's in the correct places. -# if (is.matrix(alpha)) -# scaled.res = array(1, dim = dim(alpha)) -# else -# scaled.res = rep(1, length.out = length(alpha)) -# is.na(scaled.res) = is.na(flip) -# -# alpha.idx = is.finite(alpha) -# scaled.res[alpha.idx] = 1 + alpha.frac[alpha.idx] -# beta.idx = is.finite(beta) -# scaled.res[beta.idx] = scaled.res[beta.idx] - beta.frac[beta.idx] -# -# # Handle approximately equal endpoints. -# endpts.equal = is.infinite(pnorm.diff) -# scaled.res[endpts.equal] = (alpha[endpts.equal] + beta[endpts.equal])^2 / 4 -# -# # Check that the results make sense. When beta is negative, -# # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared -# # value, and it is typically a good approximation as beta goes to -Inf. -# # When the endpoints are very close to one another, the expected squared -# # value of the uniform distribution on [alpha, beta] is a better upper -# # bound (and approximation). -# upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) -# upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 -# upper.bd = pmin(upper.bd1, upper.bd2) -# bad.idx = (!is.na(beta) & beta < 0 -# & (scaled.res < beta^2 | scaled.res > upper.bd)) -# scaled.res[bad.idx] = upper.bd[bad.idx] -# -# res = mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta) + sd^2 * scaled.res -# # message(paste("μ^2 =",mean^2)) -# # message(paste("σ^2 =",sd^2)) -# # message(paste("tnmom2(α, β) =",scaled.res)) -# # message(paste("2μ * σ =",2 * mean * sd)) -# # message(paste("tnmean(α, β) =",my_etruncnorm(alpha, beta))) -# # -# # message(paste("μ^2 + σ^2 * tnmom2(α, β) =",mean^2 + sd^2 * scaled.res)) -# # message(paste("2μ * σ * tnmean(α, β) =",2 * mean * sd * my_etruncnorm(alpha, beta))) -# # -# # message(paste("μ^2 + 2μ * σ * tnmean(α, β)=",mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta))) -# # message(paste("σ^2 * tnmom2(α, β) =",sd^2 * scaled.res)) -# -# # Handle zero sds. -# if (any(sd == 0)) { -# a = rep(a, length.out = length(res)) -# b = rep(b, length.out = length(res)) -# mean = rep(mean, length.out = length(res)) -# -# sd.zero = (sd == 0) -# res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 -# res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 -# res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean]^2 -# } -# -# return(res) -# } - library(RcppFaddeeva) my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl From dea7f6f139f263ca96d56fe6c38a6bb9baa1e0d5 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Fri, 4 Mar 2022 07:42:33 -0600 Subject: [PATCH 03/20] new 2nd moment of truncated normal function. only works w scalar inputs for now --- R/truncnorm.R | 146 ++++++++++++++++++++++++++++---------------------- 1 file changed, 81 insertions(+), 65 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 4dfe62a..7bd30e6 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -118,72 +118,88 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #' #' @export #' +library(RcppFaddeeva) my_e2truncnorm = function(a, b, mean = 0, sd = 1) { - do_truncnorm_argchecks(a, b) - - alpha = (a - mean) / sd - beta = (b - mean) / sd - - # Flip alpha and beta when both are positive (as above, but the mean is - # also recycled and flipped so that we don't have to flip back). - flip = (alpha > 0 & beta > 0) - flip[is.na(flip)] = FALSE - orig.alpha = alpha - alpha[flip] = -beta[flip] - beta[flip] = -orig.alpha[flip] - if (any(mean != 0)) { - mean = rep(mean, length.out = length(alpha)) - mean[flip] = -mean[flip] - } - - pnorm.diff = logscale_sub(pnorm(beta, log.p = TRUE), pnorm(alpha, log.p = TRUE)) - alpha.frac = alpha * exp(dnorm(alpha, log = TRUE) - pnorm.diff) - beta.frac = beta * exp(dnorm(beta, log = TRUE) - pnorm.diff) - - # Create a vector or matrix of 1's with NA's in the correct places. - if (is.matrix(alpha)) - scaled.res = array(1, dim = dim(alpha)) - else - scaled.res = rep(1, length.out = length(alpha)) - is.na(scaled.res) = is.na(flip) - - alpha.idx = is.finite(alpha) - scaled.res[alpha.idx] = 1 + alpha.frac[alpha.idx] - beta.idx = is.finite(beta) - scaled.res[beta.idx] = scaled.res[beta.idx] - beta.frac[beta.idx] - - # Handle approximately equal endpoints. - endpts.equal = is.infinite(pnorm.diff) - scaled.res[endpts.equal] = (alpha[endpts.equal] + beta[endpts.equal])^2 / 4 - - # Check that the results make sense. When beta is negative, - # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared - # value, and it is typically a good approximation as beta goes to -Inf. - # When the endpoints are very close to one another, the expected squared - # value of the uniform distribution on [alpha, beta] is a better upper - # bound (and approximation). - upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) - upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 - upper.bd = pmin(upper.bd1, upper.bd2) - bad.idx = (!is.na(beta) & beta < 0 - & (scaled.res < beta^2 | scaled.res > upper.bd)) - scaled.res[bad.idx] = upper.bd[bad.idx] - - res = mean^2 + 2 * mean * sd * my_etruncnorm(alpha, beta) + sd^2 * scaled.res - - # Handle zero sds. - if (any(sd == 0)) { - a = rep(a, length.out = length(res)) - b = rep(b, length.out = length(res)) - mean = rep(mean, length.out = length(res)) - - sd.zero = (sd == 0) - res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 - res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 - res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean]^2 - } - - return(res) + # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl + do_truncnorm_argchecks(a, b) + + if ((mean == 0) && (sd ==1)){ + #standard normal distribution + if (a == b){ + # point mass + # ⟹ 2nd moment is point-value squared + return(a^2) + } + else if (abs(a) > abs(b)) { + # forces a and b to satisfy b ≥ 0, |a| ≤ |b| + return(my_e2truncnorm(-b, -a)) + } + else if (is.infinite(a) && is.infinite(b)){ + # untruncated normal distribution + # ⟹ 2nd moment is 1 + return(1.0) + } + else if (is.infinite(b)) { + # truncated to [a,∞) + # ⟹ 2nd moment simplifies to 1 + aϕ(a)/(1 - Φ(a)) + m2 = 1 + sqrt(2 / pi) * a / Re(erfcx(a / sqrt(2))) + stopifnot(a^2 <= m2) + return(m2) + } + + #now everything is finite, b ≥ 0, and |a| ≤ |b|, so + # either a ≤ 0 ≤ b or 0 ≤ a ≤ b + stopifnot(a < b, b < Inf, abs(a) <= abs(b)) + stopifnot(((a <= 0) && (0 <= b)) || ((0 <= a) && (a <= b))) + + if ((a <= 0) && (0 <= b)){ + # a ≤ 0 ≤ b + #catestrophic cancellation is less of an issue + ea = sqrt(pi/2) * Re(erf(a / sqrt(2))) + eb = sqrt(pi/2) * Re(erf(b / sqrt(2))) + fa = ea - a * exp(-a^2 / 2) + fb = eb - b * exp(-b^2 / 2) + m2 = (fb - fa) / (eb - ea) + stopifnot(fb >= fa, eb >= ea) + stopifnot(0 <= m2, m2 <= 1) + return(m2) + } + else { + # 0 ≤ a ≤ b + #strategically avoid catestrophic cancellation as much as possible + exdiff = exp((a - b)*(a + b)/2) + ea = sqrt(pi/2) * Re(erfcx(a / sqrt(2))) + eb = sqrt(pi/2) * Re(erfcx(b / sqrt(2))) + fa = ea + a + fb = eb + b + m2 = (fa - fb * exdiff) / (ea - eb * exdiff) + stopifnot(a^2 <= m2, m2 <= b^2) + return(m2) + } + } + else if (sd > 0){ + #nonstandard normal + alpha = (a - mean) / sd + beta = (b - mean) / sd + # TODO potential for catestrophic cancellation here... is there a better way? + return(mean^2 + sd^2 * my_e2truncnorm(alpha, beta) + 2 * mean * sd * my_etruncnorm(alpha, beta)) + } + else { + #point mass + # TODO potential error in original the julia code?? + # i wrote what I think it should be, but that doesn't agree with + # the original code + if ((a <= mean) && (a <= b)) { + # ⟹ if mean ∈ [a,b], 2nd moment is mean^2 + return(mean^2) + } else if (mean < a){ + # ⟹ if mean < a, 2nd moment is a^2 + return(a^2) + } else if (mean > b){ + # ⟹ if mean > a, 2nd moment is b^2 + return(b^2) + } + } } #' @title Variance of Truncated Normal From 873e545ded08f1c854f786b58cfcec214aa318bb Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 7 Mar 2022 14:45:54 -0600 Subject: [PATCH 04/20] small changes to unvectorized function --- R/truncnorm.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 7bd30e6..bdd4bb9 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -165,7 +165,7 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { return(m2) } else { - # 0 ≤ a ≤ b + # 0 < a ≤ b #strategically avoid catestrophic cancellation as much as possible exdiff = exp((a - b)*(a + b)/2) ea = sqrt(pi/2) * Re(erfcx(a / sqrt(2))) @@ -182,13 +182,10 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { alpha = (a - mean) / sd beta = (b - mean) / sd # TODO potential for catestrophic cancellation here... is there a better way? - return(mean^2 + sd^2 * my_e2truncnorm(alpha, beta) + 2 * mean * sd * my_etruncnorm(alpha, beta)) + return(mean*(mean + 2 * sd * my_etruncnorm(alpha, beta)) + sd^2 * my_e2truncnorm(alpha, beta)) } else { #point mass - # TODO potential error in original the julia code?? - # i wrote what I think it should be, but that doesn't agree with - # the original code if ((a <= mean) && (a <= b)) { # ⟹ if mean ∈ [a,b], 2nd moment is mean^2 return(mean^2) From 83fa8151df23c473264755d09faa15e760d6aef8 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 7 Mar 2022 14:46:13 -0600 Subject: [PATCH 05/20] draft of vectorized function --- R/truncnorm.R | 117 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index bdd4bb9..779b486 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -193,12 +193,127 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # ⟹ if mean < a, 2nd moment is a^2 return(a^2) } else if (mean > b){ - # ⟹ if mean > a, 2nd moment is b^2 + # ⟹ if mean > b, 2nd moment is b^2 return(b^2) } } } +#' @title Expected Squared Value of Truncated Normal +#' +#' @description Computes the expected squared values of truncated normal +#' distributions with parameters \code{a}, \code{b}, \code{mean}, and +#' \code{sd}. Arguments can be scalars, vectors, or matrices. Arguments of +#' shorter length will be recycled according to the usual recycling rules, +#' but \code{a} and \code{b} must have the same length. Missing values are +#' accepted for all arguments. +#' +#' @inheritParams my_etruncnorm +#' +#' @param sd The standard deviation of the untruncated normal. Standard +#' deviations of zero are interpreted as numerically (rather than exactly) +#' zero, so that the square of the untruncated mean is returned if it lies +#' within \code{[a, b]} and the square of the nearer of \code{a} and +#' \code{b} is returned otherwise. +#' +#' @return The expected squared values of truncated normal +#' distributions with parameters \code{a}, \code{b}, \code{mean}, and +#' \code{sd}. If any of the arguments is a matrix, then a matrix will +#' be returned. +#' +#' @seealso \code{\link{my_etruncnorm}}, \code{\link{my_vtruncnorm}} +#' +#' @export +#' +library(RcppFaddeeva) +my_e2truncnormVECTORIZED = function(a, b, mean = 0, sd = 1) { + # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl + do_truncnorm_argchecks(a, b) + + # initialize array to store result of shape like a + if (is.matrix(a)) + res = array(dim = dim(a)) + else + res = rep(NA, length.out = length(a)) + + #make sure input sizes all match + a = rep(a, length.out = length(res)) + b = rep(b, length.out = length(res)) + mean = rep(mean, length.out = length(res)) + sd = rep(sd, length.out = length(res)) + + # Handle zero sd point masses + sd.zero = (sd == 0) + # if mean ≥ b, 2nd moment is b^2 + res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 + # if mean ≤ a, 2nd moment is a^2 + res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 + # if mean ∈ (a,b), 2nd moment is mean^2 + res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 + + # Rescale to standard normal distributions if sd is nonzero + alpha = (a[!sd.zero] - mean[!sd.zero]) / sd[!sd.zero] + beta = (b[!sd.zero] - mean[!sd.zero]) / sd[!sd.zero] + scaled.mean = my_etruncnorm(alpha, beta) + # initialize array for scaled 2nd moments + scaled.2mom = rep(NA, length.out = length(alpha)) + + # point mass bc endpoints are equal + # 2nd moment is point-value squared + endpoints_equal = (alpha == beta) + scaled.2mom[endpoints_equal] = alpha[endpoints_equal] ^ 2 + # keep track of which spots in scaled.2mom are already computed + computed = endpoints_equal + + # force to satisfy β ≥ 0 and |α| ≤ |β| + # so either α ≤ 0 ≤ β or 0 < α ≤ β + flip = !computed & abs(alpha) > abs(beta) + flip[is.na(flip)] = FALSE + orig.alpha = alpha + alpha[flip] = -beta[flip] + beta[flip] = -orig.alpha[flip] + + # both endpoints infinite/untruncated normal distribution + # 2nd moment is 1 + both_inf = !computed & is.infinite(alpha) & is.infinite(beta) + scaled.2mom[both_inf] = 1 + computed = computed | both_inf + + # truncated to [α,∞) + # 2nd moment simplifies to 1 + αϕ(α)/(1 - Φ(α)) + beta_inf = !computed & is.infinite(beta) + scaled.2mom[beta_inf] = 1 + sqrt(2 / pi) * alpha[beta_inf] / Re(erfcx(alpha[beta_inf] / sqrt(2))) + computed = computed | beta_inf + + # a ≤ 0 ≤ b + #catestrophic cancellation is less of an issue + alpha_negative = !computed & alpha <= 0 + ea = sqrt(pi/2) * Re(erf(alpha[alpha_negative] / sqrt(2))) + eb = sqrt(pi/2) * Re(erf(beta[alpha_negative] / sqrt(2))) + fa = ea - alpha[alpha_negative] * exp(-alpha[alpha_negative]^2 / 2) + fb = eb - beta[alpha_negative] * exp(-beta[alpha_negative]^2 / 2) + scaled.2mom[alpha_negative] = (fb - fa) / (eb - ea) + computed = computed | alpha_negative + + # 0 < a ≤ b + #strategically avoid catestrophic cancellation as much as possible + exdiff = exp((alpha[!computed] - beta[!computed])*(alpha[!computed] + beta[!computed])/2) + ea = sqrt(pi/2) * Re(erfcx(alpha[!computed] / sqrt(2))) + eb = sqrt(pi/2) * Re(erfcx(beta[!computed] / sqrt(2))) + fa = ea + alpha[!computed] + fb = eb + beta[!computed] + scaled.2mom[!computed] = (fa - fb * exdiff) / (ea - eb * exdiff) + + # transform results back to nonstandard normal case + # μ(μ + 2σ * tn_mean(α, β)) + σ^2 tn_2nd_mom(α, β) + # TODO potential for catestrophic cancellation here... is there a better way? + # multiplying by mean outside is definitely better, but maybe an even better way? + res[!sd.zero] = mean*(mean + 2 * sd * scaled.mean) + res[!sd.zero] = res[!sd.zero] + sd^2 * scaled.2mom + + return(res) +} + #' @title Variance of Truncated Normal #' @description Computes the variance of truncated normal distributions with #' parameters \code{a}, \code{b}, \code{mean}, and \code{sd}. Arguments can From 4467f6b464807f3fae20383e87cf013568ae0ebe Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 7 Mar 2022 14:48:11 -0600 Subject: [PATCH 06/20] get rid of un-vectorized version --- R/truncnorm.R | 107 -------------------------------------------------- 1 file changed, 107 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 779b486..1b0e378 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -123,113 +123,6 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl do_truncnorm_argchecks(a, b) - if ((mean == 0) && (sd ==1)){ - #standard normal distribution - if (a == b){ - # point mass - # ⟹ 2nd moment is point-value squared - return(a^2) - } - else if (abs(a) > abs(b)) { - # forces a and b to satisfy b ≥ 0, |a| ≤ |b| - return(my_e2truncnorm(-b, -a)) - } - else if (is.infinite(a) && is.infinite(b)){ - # untruncated normal distribution - # ⟹ 2nd moment is 1 - return(1.0) - } - else if (is.infinite(b)) { - # truncated to [a,∞) - # ⟹ 2nd moment simplifies to 1 + aϕ(a)/(1 - Φ(a)) - m2 = 1 + sqrt(2 / pi) * a / Re(erfcx(a / sqrt(2))) - stopifnot(a^2 <= m2) - return(m2) - } - - #now everything is finite, b ≥ 0, and |a| ≤ |b|, so - # either a ≤ 0 ≤ b or 0 ≤ a ≤ b - stopifnot(a < b, b < Inf, abs(a) <= abs(b)) - stopifnot(((a <= 0) && (0 <= b)) || ((0 <= a) && (a <= b))) - - if ((a <= 0) && (0 <= b)){ - # a ≤ 0 ≤ b - #catestrophic cancellation is less of an issue - ea = sqrt(pi/2) * Re(erf(a / sqrt(2))) - eb = sqrt(pi/2) * Re(erf(b / sqrt(2))) - fa = ea - a * exp(-a^2 / 2) - fb = eb - b * exp(-b^2 / 2) - m2 = (fb - fa) / (eb - ea) - stopifnot(fb >= fa, eb >= ea) - stopifnot(0 <= m2, m2 <= 1) - return(m2) - } - else { - # 0 < a ≤ b - #strategically avoid catestrophic cancellation as much as possible - exdiff = exp((a - b)*(a + b)/2) - ea = sqrt(pi/2) * Re(erfcx(a / sqrt(2))) - eb = sqrt(pi/2) * Re(erfcx(b / sqrt(2))) - fa = ea + a - fb = eb + b - m2 = (fa - fb * exdiff) / (ea - eb * exdiff) - stopifnot(a^2 <= m2, m2 <= b^2) - return(m2) - } - } - else if (sd > 0){ - #nonstandard normal - alpha = (a - mean) / sd - beta = (b - mean) / sd - # TODO potential for catestrophic cancellation here... is there a better way? - return(mean*(mean + 2 * sd * my_etruncnorm(alpha, beta)) + sd^2 * my_e2truncnorm(alpha, beta)) - } - else { - #point mass - if ((a <= mean) && (a <= b)) { - # ⟹ if mean ∈ [a,b], 2nd moment is mean^2 - return(mean^2) - } else if (mean < a){ - # ⟹ if mean < a, 2nd moment is a^2 - return(a^2) - } else if (mean > b){ - # ⟹ if mean > b, 2nd moment is b^2 - return(b^2) - } - } -} - -#' @title Expected Squared Value of Truncated Normal -#' -#' @description Computes the expected squared values of truncated normal -#' distributions with parameters \code{a}, \code{b}, \code{mean}, and -#' \code{sd}. Arguments can be scalars, vectors, or matrices. Arguments of -#' shorter length will be recycled according to the usual recycling rules, -#' but \code{a} and \code{b} must have the same length. Missing values are -#' accepted for all arguments. -#' -#' @inheritParams my_etruncnorm -#' -#' @param sd The standard deviation of the untruncated normal. Standard -#' deviations of zero are interpreted as numerically (rather than exactly) -#' zero, so that the square of the untruncated mean is returned if it lies -#' within \code{[a, b]} and the square of the nearer of \code{a} and -#' \code{b} is returned otherwise. -#' -#' @return The expected squared values of truncated normal -#' distributions with parameters \code{a}, \code{b}, \code{mean}, and -#' \code{sd}. If any of the arguments is a matrix, then a matrix will -#' be returned. -#' -#' @seealso \code{\link{my_etruncnorm}}, \code{\link{my_vtruncnorm}} -#' -#' @export -#' -library(RcppFaddeeva) -my_e2truncnormVECTORIZED = function(a, b, mean = 0, sd = 1) { - # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl - do_truncnorm_argchecks(a, b) - # initialize array to store result of shape like a if (is.matrix(a)) res = array(dim = dim(a)) From af213314a7e5027e68a6692a81c08fa3767adb35 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 7 Mar 2022 16:23:13 -0600 Subject: [PATCH 07/20] bug fix --- R/truncnorm.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 1b0e378..fefa9ac 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -143,10 +143,16 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 # if mean ∈ (a,b), 2nd moment is mean^2 res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 + + # Focus in on where sd is nonzero + a = a[!sd.zero] + b = b[!sd.zero] + mean = mean[!sd.zero] + sd = sd[!sd.zero] # Rescale to standard normal distributions if sd is nonzero - alpha = (a[!sd.zero] - mean[!sd.zero]) / sd[!sd.zero] - beta = (b[!sd.zero] - mean[!sd.zero]) / sd[!sd.zero] + alpha = (a - mean) / sd + beta = (b - mean) / sd scaled.mean = my_etruncnorm(alpha, beta) # initialize array for scaled 2nd moments scaled.2mom = rep(NA, length.out = length(alpha)) From 7ced6b5c4f4687e59a3759e584d562abccb01011 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 7 Mar 2022 16:33:36 -0600 Subject: [PATCH 08/20] 2 space tabs --- R/truncnorm.R | 174 +++++++++++++++++++++++++++----------------------- 1 file changed, 94 insertions(+), 80 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index fefa9ac..de12906 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -120,97 +120,111 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #' library(RcppFaddeeva) my_e2truncnorm = function(a, b, mean = 0, sd = 1) { - # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl - do_truncnorm_argchecks(a, b) + # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl + do_truncnorm_argchecks(a, b) - # initialize array to store result of shape like a - if (is.matrix(a)) - res = array(dim = dim(a)) - else - res = rep(NA, length.out = length(a)) + # initialize array to store result of shape like a + if (is.matrix(a)) + res = array(dim = dim(a)) + else + res = rep(NA, length.out = length(a)) - #make sure input sizes all match - a = rep(a, length.out = length(res)) - b = rep(b, length.out = length(res)) - mean = rep(mean, length.out = length(res)) - sd = rep(sd, length.out = length(res)) + #make sure input sizes all match + a = rep(a, length.out = length(res)) + b = rep(b, length.out = length(res)) + mean = rep(mean, length.out = length(res)) + sd = rep(sd, length.out = length(res)) - # Handle zero sd point masses - sd.zero = (sd == 0) - # if mean ≥ b, 2nd moment is b^2 - res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 - # if mean ≤ a, 2nd moment is a^2 - res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 - # if mean ∈ (a,b), 2nd moment is mean^2 - res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 - - # Focus in on where sd is nonzero - a = a[!sd.zero] - b = b[!sd.zero] - mean = mean[!sd.zero] - sd = sd[!sd.zero] + # Handle zero sd point masses + sd.zero = (sd == 0) + # if mean ≥ b, 2nd moment is b^2 + res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 + # if mean ≤ a, 2nd moment is a^2 + res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 + # if mean ∈ (a,b), 2nd moment is mean^2 + res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 - # Rescale to standard normal distributions if sd is nonzero - alpha = (a - mean) / sd - beta = (b - mean) / sd - scaled.mean = my_etruncnorm(alpha, beta) - # initialize array for scaled 2nd moments - scaled.2mom = rep(NA, length.out = length(alpha)) + # Focus in on where sd is nonzero + a = a[!sd.zero] + b = b[!sd.zero] + mean = mean[!sd.zero] + sd = sd[!sd.zero] - # point mass bc endpoints are equal - # 2nd moment is point-value squared - endpoints_equal = (alpha == beta) - scaled.2mom[endpoints_equal] = alpha[endpoints_equal] ^ 2 - # keep track of which spots in scaled.2mom are already computed - computed = endpoints_equal + # Rescale to standard normal distributions if sd is nonzero + alpha = (a - mean) / sd + beta = (b - mean) / sd + scaled.mean = my_etruncnorm(alpha, beta) + # initialize array for scaled 2nd moments + scaled.2mom = rep(NA, length.out = length(alpha)) - # force to satisfy β ≥ 0 and |α| ≤ |β| - # so either α ≤ 0 ≤ β or 0 < α ≤ β - flip = !computed & abs(alpha) > abs(beta) - flip[is.na(flip)] = FALSE - orig.alpha = alpha - alpha[flip] = -beta[flip] - beta[flip] = -orig.alpha[flip] + # point mass bc endpoints are equal + # 2nd moment is point-value squared + endpoints_equal = (alpha == beta) + scaled.2mom[endpoints_equal] = alpha[endpoints_equal] ^ 2 + # keep track of which spots in scaled.2mom are already computed + computed = endpoints_equal - # both endpoints infinite/untruncated normal distribution - # 2nd moment is 1 - both_inf = !computed & is.infinite(alpha) & is.infinite(beta) - scaled.2mom[both_inf] = 1 - computed = computed | both_inf + # force to satisfy β ≥ 0 and |α| ≤ |β| + # so either α ≤ 0 ≤ β or 0 < α ≤ β + flip = !computed & abs(alpha) > abs(beta) + flip[is.na(flip)] = FALSE + orig.alpha = alpha + alpha[flip] = -beta[flip] + beta[flip] = -orig.alpha[flip] - # truncated to [α,∞) - # 2nd moment simplifies to 1 + αϕ(α)/(1 - Φ(α)) - beta_inf = !computed & is.infinite(beta) - scaled.2mom[beta_inf] = 1 + sqrt(2 / pi) * alpha[beta_inf] / Re(erfcx(alpha[beta_inf] / sqrt(2))) - computed = computed | beta_inf + # both endpoints infinite/untruncated normal distribution + # 2nd moment is 1 + both_inf = !computed & is.infinite(alpha) & is.infinite(beta) + scaled.2mom[both_inf] = 1 + computed = computed | both_inf - # a ≤ 0 ≤ b - #catestrophic cancellation is less of an issue - alpha_negative = !computed & alpha <= 0 - ea = sqrt(pi/2) * Re(erf(alpha[alpha_negative] / sqrt(2))) - eb = sqrt(pi/2) * Re(erf(beta[alpha_negative] / sqrt(2))) - fa = ea - alpha[alpha_negative] * exp(-alpha[alpha_negative]^2 / 2) - fb = eb - beta[alpha_negative] * exp(-beta[alpha_negative]^2 / 2) - scaled.2mom[alpha_negative] = (fb - fa) / (eb - ea) - computed = computed | alpha_negative - - # 0 < a ≤ b - #strategically avoid catestrophic cancellation as much as possible - exdiff = exp((alpha[!computed] - beta[!computed])*(alpha[!computed] + beta[!computed])/2) - ea = sqrt(pi/2) * Re(erfcx(alpha[!computed] / sqrt(2))) - eb = sqrt(pi/2) * Re(erfcx(beta[!computed] / sqrt(2))) - fa = ea + alpha[!computed] - fb = eb + beta[!computed] - scaled.2mom[!computed] = (fa - fb * exdiff) / (ea - eb * exdiff) + # truncated to [α,∞) + # 2nd moment simplifies to 1 + αϕ(α)/(1 - Φ(α)) + beta_inf = !computed & is.infinite(beta) + scaled.2mom[beta_inf] = 1 + sqrt(2 / pi) * alpha[beta_inf] / Re(erfcx(alpha[beta_inf] / sqrt(2))) + computed = computed | beta_inf - # transform results back to nonstandard normal case - # μ(μ + 2σ * tn_mean(α, β)) + σ^2 tn_2nd_mom(α, β) - # TODO potential for catestrophic cancellation here... is there a better way? - # multiplying by mean outside is definitely better, but maybe an even better way? - res[!sd.zero] = mean*(mean + 2 * sd * scaled.mean) - res[!sd.zero] = res[!sd.zero] + sd^2 * scaled.2mom + # a ≤ 0 ≤ b + #catestrophic cancellation is less of an issue + alpha_negative = !computed & alpha <= 0 + ea = sqrt(pi/2) * Re(erf(alpha[alpha_negative] / sqrt(2))) + eb = sqrt(pi/2) * Re(erf(beta[alpha_negative] / sqrt(2))) + fa = ea - alpha[alpha_negative] * exp(-alpha[alpha_negative]^2 / 2) + fb = eb - beta[alpha_negative] * exp(-beta[alpha_negative]^2 / 2) + scaled.2mom[alpha_negative] = (fb - fa) / (eb - ea) + computed = computed | alpha_negative - return(res) + # 0 < a ≤ b + #strategically avoid catestrophic cancellation as much as possible + exdiff = exp((alpha[!computed] - beta[!computed])*(alpha[!computed] + beta[!computed])/2) + ea = sqrt(pi/2) * Re(erfcx(alpha[!computed] / sqrt(2))) + eb = sqrt(pi/2) * Re(erfcx(beta[!computed] / sqrt(2))) + fa = ea + alpha[!computed] + fb = eb + beta[!computed] + scaled.2mom[!computed] = (fa - fb * exdiff) / (ea - eb * exdiff) + + # transform results back to nonstandard normal case + # μ(μ + 2σ * tn_mean(α, β)) + σ^2 tn_2nd_mom(α, β) + # TODO potential for catestrophic cancellation here... is there a better way? + # multiplying by mean outside is definitely better, but maybe an even better way? + res[!sd.zero] = mean*(mean + 2 * sd * scaled.mean) + res[!sd.zero] = res[!sd.zero] + sd^2 * scaled.2mom + + # TODO ADD IN ERROR CHECKING + # Check that the results make sense. When beta is negative, + # # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared + # # value, and it is typically a good approximation as beta goes to -Inf. + # # When the endpoints are very close to one another, the expected squared + # # value of the uniform distribution on [alpha, beta] is a better upper + # # bound (and approximation). + # upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) + # upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 + # upper.bd = pmin(upper.bd1, upper.bd2) + # bad.idx = (!is.na(beta) & beta < 0 + # & (scaled.res < beta^2 | scaled.res > upper.bd)) + # scaled.res[bad.idx] = upper.bd[bad.idx] + + return(res) } #' @title Variance of Truncated Normal From a8e3f1ac171fd2b90efc5eebc6d72fdf647b9567 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 18:09:12 -0500 Subject: [PATCH 09/20] add changes to other truncnorm functions --- R/truncnorm.R | 187 +++++++++++++++++++++++++------------------------- 1 file changed, 92 insertions(+), 95 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index de12906..bbe1310 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -29,65 +29,90 @@ #' @seealso \code{\link{my_e2truncnorm}}, \code{\link{my_vtruncnorm}} #' #' @export -#' my_etruncnorm = function(a, b, mean = 0, sd = 1) { + # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # The case where some sds are zero is handled last. In the meantime, assume - # that sd > 0. + # initialize array to store result of shape like a + if (is.matrix(a)){ + res = array(dim = dim(a)) + } + else{ + res = rep(NA, length.out = length(a)) + } + + #make sure input sizes all match + a = rep(a, length.out = length(res)) + b = rep(b, length.out = length(res)) + mean = rep(mean, length.out = length(res)) + sd = rep(sd, length.out = length(res)) + + # Handle zero sds. Return the mean of the untruncated normal when it is + # located inside of the interval [alpha, beta]. Otherwise, return the + # endpoint that is closer to the mean. + sd.zero = (sd == 0) + res[sd.zero & b <= mean] = b[sd.zero & b <= mean] + res[sd.zero & a >= mean] = a[sd.zero & a >= mean] + res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean] + + # Focus in on where sd is nonzero + a = a[!sd.zero] + b = b[!sd.zero] + mean = mean[!sd.zero] + sd = sd[!sd.zero] + + # Rescale to standard normal distributions alpha = (a - mean) / sd beta = (b - mean) / sd + # initialize array for scaled 2nd moments + scaled.mean = rep(NA, length.out = length(alpha)) + + # point mass bc endpoints are equal + # 1st moment is point-value + endpoints_equal = (alpha == beta) + scaled.mean[endpoints_equal] = alpha[endpoints_equal] + # keep track of which spots in scaled.2mom are already computed + computed = endpoints_equal - # Flip alpha and beta when: 1. Both are positive (since computations are - # unstable when both values of pnorm are close to 1); 2. dnorm(alpha) is - # greater than dnorm(beta) (since subtraction is done on the log scale). - flip = (alpha > 0 & beta > 0) | (beta > abs(alpha)) + # force to satisfy β ≥ 0 and |α| ≤ |β| + # so either α ≤ 0 ≤ β or 0 < α ≤ β + flip = !computed & abs(alpha) > abs(beta) flip[is.na(flip)] = FALSE orig.alpha = alpha alpha[flip] = -beta[flip] beta[flip] = -orig.alpha[flip] - dnorm.diff = logscale_sub(dnorm(beta, log = TRUE), dnorm(alpha, log = TRUE)) - pnorm.diff = logscale_sub(pnorm(beta, log.p = TRUE), pnorm(alpha, log.p = TRUE)) - scaled.res = -exp(dnorm.diff - pnorm.diff) + # both endpoints infinite/untruncated normal distribution + # scaled mean is 0 + both_inf = !computed & is.infinite(alpha) & is.infinite(beta) + scaled.mean[both_inf] = 0 + computed = computed | both_inf - # Handle the division by zero that occurs when pnorm.diff = -Inf (that is, - # when endpoints are approximately equal). - endpts.equal = is.infinite(pnorm.diff) - scaled.res[endpts.equal] = (alpha[endpts.equal] + beta[endpts.equal]) / 2 + # a ≤ 0 ≤ b + #catestrophic cancellation is less of an issue + alpha_negative = !computed & alpha <= 0 + diff = (alpha[alpha_negative] - beta[alpha_negative]) * (alpha[alpha_negative] + beta[alpha_negative]) / 2 + #√(2/π) * expm1(-Δ) * exp(-α^2 / 2) / erf(β/√2, α/√2) + scaled.mean[alpha_negative] = sqrt(2/pi) * expm1(-diff) * exp(-alpha[alpha_negative]^2 / 2) + denom = Re(erf(alpha[alpha_negative]/√2)) - Re(erf(beta[alpha_negative]/√2)) + scaled.mean[alpha_negative] = scaled.mean[alpha_negative] / denom + computed = computed | alpha_negative + + # 0 < a < b + #strategically avoid catestrophic cancellation as much as possible + diff = (alpha[!computed] - beta[!computed]) * (alpha[!computed] + beta[!computed]) / 2 + denom = exp(-diff) * Re(erfcx(beta[!computed] / sqrt(2))) - Re(erfcx(alpha[!computed] / sqrt(2))) + scaled.mean[!computed] = sqrt(2/pi) * expm1(-diff) / denom - # When alpha and beta are very large and both negative (due to the flipping - # logic, they cannot both be positive), computations can become unstable. - # We find such cases by checking that the expectations make sense. When - # beta is negative, beta + 1 / beta is a lower bound for the expectation. - # Further, it is an increasingly good approximation as beta goes to -Inf - # as long as alpha and beta aren't too close to one another. If they are, - # then their midpoint can be used as an alternate approximation (and lower - # bound). - lower.bd = pmax(beta + 1 / beta, (alpha + beta) / 2) - bad.idx = (!is.na(beta) & beta < 0 - & (scaled.res < lower.bd | scaled.res > beta)) - scaled.res[bad.idx] = lower.bd[bad.idx] + #double check that things are within bounds + scaled.mean[scaled.mean > beta] = beta[scaled.mean > beta] + scaled.mean[scaled.mean < alpha] = alpha[scaled.mean < alpha] # Flip back. scaled.res[flip] = -scaled.res[flip] - res = mean + sd * scaled.res - - # Handle zero sds. Return the mean of the untruncated normal when it is - # located inside of the interval [alpha, beta]. Otherwise, return the - # endpoint that is closer to the mean. - if (any(sd == 0)) { - # For the subsetting to work correctly, arguments need to be recycled. - a = rep(a, length.out = length(res)) - b = rep(b, length.out = length(res)) - mean = rep(mean, length.out = length(res)) - - sd.zero = (sd == 0) - res[sd.zero & b <= mean] = b[sd.zero & b <= mean] - res[sd.zero & a >= mean] = a[sd.zero & a >= mean] - res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean] - } + #transform back to nonstandard normal case + res[!sd.zero] = mean + sd * scaled.res return(res) } @@ -120,14 +145,16 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #' library(RcppFaddeeva) my_e2truncnorm = function(a, b, mean = 0, sd = 1) { - # based off of https://github.com/cossio/TruncatedNormal.jl/blob/3c16866c3afa3920e787513d492689e9e81192ca/src/tnmom2.jl + # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) # initialize array to store result of shape like a - if (is.matrix(a)) + if (is.matrix(a)){ res = array(dim = dim(a)) - else + } + else{ res = rep(NA, length.out = length(a)) + } #make sure input sizes all match a = rep(a, length.out = length(res)) @@ -204,25 +231,16 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { scaled.2mom[!computed] = (fa - fb * exdiff) / (ea - eb * exdiff) # transform results back to nonstandard normal case - # μ(μ + 2σ * tn_mean(α, β)) + σ^2 tn_2nd_mom(α, β) - # TODO potential for catestrophic cancellation here... is there a better way? - # multiplying by mean outside is definitely better, but maybe an even better way? - res[!sd.zero] = mean*(mean + 2 * sd * scaled.mean) - res[!sd.zero] = res[!sd.zero] + sd^2 * scaled.2mom + # μ^2 + σ^2 E(Z^2) + 2 μ σ E(Z) + # possible catestropic cancellation because μ^2 + σ^2 E(Z^2) ≧ 0 while 2μσE(Z) has unknown sign + # If |μ| < σ , compute as μ^2 + σ(σ E(Z^2) + 2 μ E(Z)) + mean_smaller = abs(mean) < sd + res[!sd.zero & mean_smaller] = mean^2 + sd*(sd * scaled.2mom + 2 * mean * scaled.mean) + # If σ ≦ |μ| , compute as μ(μ + 2 σ E(Z)) + σ^2 E(Z^2) + res[!sd.zero & !mean_smaller] = mean*(mean + 2 * sd * scaled.mean) + sd^2 * scaled.2mom - # TODO ADD IN ERROR CHECKING - # Check that the results make sense. When beta is negative, - # # beta^2 + 2 * (1 + 1 / beta^2) is an upper bound for the expected squared - # # value, and it is typically a good approximation as beta goes to -Inf. - # # When the endpoints are very close to one another, the expected squared - # # value of the uniform distribution on [alpha, beta] is a better upper - # # bound (and approximation). - # upper.bd1 = beta^2 + 2 * (1 + 1 / beta^2) - # upper.bd2 = (alpha^2 + alpha * beta + beta^2) / 3 - # upper.bd = pmin(upper.bd1, upper.bd2) - # bad.idx = (!is.na(beta) & beta < 0 - # & (scaled.res < beta^2 | scaled.res > upper.bd)) - # scaled.res[bad.idx] = upper.bd[bad.idx] + # Check that the results make sense -- should never be negative + stopifnot(all(res >= 0)) return(res) } @@ -244,31 +262,27 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { #' @seealso \code{\link{my_etruncnorm}}, \code{\link{my_e2truncnorm}} #' #' @export -#' my_vtruncnorm = function(a, b, mean = 0, sd = 1) { + # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) + #solved scaled problem alpha = (a - mean) / sd beta = (b - mean) / sd - scaled.res = my_e2truncnorm(alpha, beta) - my_etruncnorm(alpha, beta)^2 - - # When alpha and beta are large and share the same sign, this computation - # becomes unstable. A good approximation in this regime is 1 / beta^2 - # (when alpha and beta are both negative). If my_e2truncnorm and - # my_etruncnorm are accurate to the eighth digit, then we can only trust - # results up to the second digit if beta^2 and 1 / beta^2 differ by an - # order of magnitude no more than 6. - smaller.endpt = pmin(abs(alpha), abs(beta)) - bad.idx = (is.finite(smaller.endpt) & smaller.endpt > 30) - scaled.res[bad.idx] = pmin(1 / smaller.endpt[bad.idx]^2, - (beta[bad.idx] - alpha[bad.idx])^2 / 12) + m1 = my_etruncnorm(alpha, beta) + m2 = sqrt(my_e2truncnorm(alpha, beta)) + scaled.res = (m2 - m1) * (m1 + m2) - # Handle zero sds. - scaled.res[is.nan(scaled.res)] = 0 + # Handle endpoints equal + scaled.res[alpha == beta] = alpha[alpha == beta] + #transform back to unscaled res = sd^2 * scaled.res + # Handle zero sds. + res[sd == 0] = 0 + return(res) } @@ -278,20 +292,3 @@ do_truncnorm_argchecks = function(a, b) { if (any(b < a, na.rm = TRUE)) stop("truncnorm functions require that a <= b.") } - -logscale_sub = function(logx, logy) { - # In rare cases, logx can become numerically less than logy. When this - # occurs, logx is adjusted and a warning is issued. - diff = logx - logy - if (any(diff < 0, na.rm = TRUE)) { - bad.idx = (diff < 0) - bad.idx[is.na(bad.idx)] = FALSE - logx[bad.idx] = logy[bad.idx] - warning("logscale_sub encountered negative value(s) of logx - logy (min: ", - formatC(min(diff[bad.idx]), format = "e", digits = 2), ")") - } - - scale.by = logx - scale.by[is.infinite(scale.by)] = 0 - return(log(exp(logx - scale.by) - exp(logy - scale.by)) + scale.by) -} From 5b0c42fcbc3cdbd75b025c7c186ae35a44ffcff6 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 18:27:25 -0500 Subject: [PATCH 10/20] small bugs --- R/truncnorm.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index bbe1310..d501132 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -94,7 +94,7 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { diff = (alpha[alpha_negative] - beta[alpha_negative]) * (alpha[alpha_negative] + beta[alpha_negative]) / 2 #√(2/π) * expm1(-Δ) * exp(-α^2 / 2) / erf(β/√2, α/√2) scaled.mean[alpha_negative] = sqrt(2/pi) * expm1(-diff) * exp(-alpha[alpha_negative]^2 / 2) - denom = Re(erf(alpha[alpha_negative]/√2)) - Re(erf(beta[alpha_negative]/√2)) + denom = Re(erf(alpha[alpha_negative] / sqrt(2))) - Re(erf(beta[alpha_negative] / sqrt(2))) scaled.mean[alpha_negative] = scaled.mean[alpha_negative] / denom computed = computed | alpha_negative @@ -109,10 +109,10 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { scaled.mean[scaled.mean < alpha] = alpha[scaled.mean < alpha] # Flip back. - scaled.res[flip] = -scaled.res[flip] + scaled.mean[flip] = -scaled.mean[flip] #transform back to nonstandard normal case - res[!sd.zero] = mean + sd * scaled.res + res[!sd.zero] = mean + sd * scaled.mean return(res) } From 6bc1ba1f31095b7e6d1ae9858ee3879985a0bf5a Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 19:40:26 -0500 Subject: [PATCH 11/20] add bug for one-sided truncations --- R/truncnorm.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/truncnorm.R b/R/truncnorm.R index d501132..43c6979 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -88,6 +88,12 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { scaled.mean[both_inf] = 0 computed = computed | both_inf + # truncated to [α,∞) + # 2nd moment simplifies to ϕ(α)/(1 - Φ(α)) + beta_inf = !computed & is.infinite(beta) + scaled.mean[beta_inf] = sqrt(2/pi) / Re(erfcx(alpha[beta_inf] / sqrt(2))) + computed = computed | beta_inf + # a ≤ 0 ≤ b #catestrophic cancellation is less of an issue alpha_negative = !computed & alpha <= 0 From 13575c012b3d8939c95f6d7f6d58a4ea98faa28c Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 20:08:49 -0500 Subject: [PATCH 12/20] fix shapes of things, bug in negating an intermediate value --- R/truncnorm.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 43c6979..1e18336 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -33,7 +33,9 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # initialize array to store result of shape like a + # force a to have the correct shape + a = a + 0 * mean + 0 * sd + # initialize array to store result if (is.matrix(a)){ res = array(dim = dim(a)) } @@ -97,7 +99,7 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { # a ≤ 0 ≤ b #catestrophic cancellation is less of an issue alpha_negative = !computed & alpha <= 0 - diff = (alpha[alpha_negative] - beta[alpha_negative]) * (alpha[alpha_negative] + beta[alpha_negative]) / 2 + diff = (beta[alpha_negative] - alpha[alpha_negative]) * (alpha[alpha_negative] + beta[alpha_negative]) / 2 #√(2/π) * expm1(-Δ) * exp(-α^2 / 2) / erf(β/√2, α/√2) scaled.mean[alpha_negative] = sqrt(2/pi) * expm1(-diff) * exp(-alpha[alpha_negative]^2 / 2) denom = Re(erf(alpha[alpha_negative] / sqrt(2))) - Re(erf(beta[alpha_negative] / sqrt(2))) @@ -106,7 +108,7 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { # 0 < a < b #strategically avoid catestrophic cancellation as much as possible - diff = (alpha[!computed] - beta[!computed]) * (alpha[!computed] + beta[!computed]) / 2 + diff = (beta[!computed] - alpha[!computed]) * (alpha[!computed] + beta[!computed]) / 2 denom = exp(-diff) * Re(erfcx(beta[!computed] / sqrt(2))) - Re(erfcx(alpha[!computed] / sqrt(2))) scaled.mean[!computed] = sqrt(2/pi) * expm1(-diff) / denom @@ -154,7 +156,9 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # initialize array to store result of shape like a + # force a to have the correct shape + a = a + 0 * mean + 0 * sd + # initialize array to store result if (is.matrix(a)){ res = array(dim = dim(a)) } From bf93b8c84f2d3c403232d4bdf5986ff04f8788b5 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 20:22:08 -0500 Subject: [PATCH 13/20] handle nans in inputs --- R/truncnorm.R | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 1e18336..a6e5b83 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -57,11 +57,15 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { res[sd.zero & a >= mean] = a[sd.zero & a >= mean] res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean] - # Focus in on where sd is nonzero - a = a[!sd.zero] - b = b[!sd.zero] - mean = mean[!sd.zero] - sd = sd[!sd.zero] + # Handle NAN inputs + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + res[isna] = NA + + # Focus in on where sd is nonzero and nothing is nan + a = a[!sd.zero & !isna] + b = b[!sd.zero & !isna] + mean = mean[!sd.zero & !isna] + sd = sd[!sd.zero & !isna] # Rescale to standard normal distributions alpha = (a - mean) / sd @@ -180,12 +184,16 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 # if mean ∈ (a,b), 2nd moment is mean^2 res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 - - # Focus in on where sd is nonzero - a = a[!sd.zero] - b = b[!sd.zero] - mean = mean[!sd.zero] - sd = sd[!sd.zero] + + # Handle NAN inputs + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + res[isna] = NA + + # Focus in on where sd is nonzero and nothing is nan + a = a[!sd.zero & !isna] + b = b[!sd.zero & !isna] + mean = mean[!sd.zero & !isna] + sd = sd[!sd.zero & !isna] # Rescale to standard normal distributions if sd is nonzero alpha = (a - mean) / sd @@ -250,7 +258,7 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { res[!sd.zero & !mean_smaller] = mean*(mean + 2 * sd * scaled.mean) + sd^2 * scaled.2mom # Check that the results make sense -- should never be negative - stopifnot(all(res >= 0)) + stopifnot(all(res >= 0 | is.na(res))) return(res) } @@ -285,7 +293,7 @@ my_vtruncnorm = function(a, b, mean = 0, sd = 1) { scaled.res = (m2 - m1) * (m1 + m2) # Handle endpoints equal - scaled.res[alpha == beta] = alpha[alpha == beta] + scaled.res[(alpha == beta) & sd != 0] = alpha[(alpha == beta) & sd != 0] #transform back to unscaled res = sd^2 * scaled.res From 5fbb405bd1c4612c181fb1fe748615bf2efe6cee Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 20:25:49 -0500 Subject: [PATCH 14/20] improve the way you get the shapes --- R/truncnorm.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index a6e5b83..907a7a3 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -33,14 +33,12 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # force a to have the correct shape - a = a + 0 * mean + 0 * sd # initialize array to store result if (is.matrix(a)){ - res = array(dim = dim(a)) + res = array(dim = dim(0 * (a + b + mean + sd))) } else{ - res = rep(NA, length.out = length(a)) + res = rep(NA, length.out = length(0 * (a + b + mean + sd))) } #make sure input sizes all match @@ -160,14 +158,12 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # force a to have the correct shape - a = a + 0 * mean + 0 * sd # initialize array to store result if (is.matrix(a)){ - res = array(dim = dim(a)) + res = array(dim = dim(0 * (a + b + mean + sd))) } else{ - res = rep(NA, length.out = length(a)) + res = rep(NA, length.out = length(0 * (a + b + mean + sd))) } #make sure input sizes all match From f45ceda934dd44661976e55d0ac905599d2d9f8a Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 21:17:19 -0500 Subject: [PATCH 15/20] handle nans in inputs and improve handling of weird shapes --- R/truncnorm.R | 58 ++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 1e18336..486c1ed 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -33,14 +33,13 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # force a to have the correct shape - a = a + 0 * mean + 0 * sd # initialize array to store result - if (is.matrix(a)){ - res = array(dim = dim(a)) + common_shape = 0 * (a + b + mean + sd) + if (is.matrix(common_shape)){ + res = array(dim = dim(common_shape)) } else{ - res = rep(NA, length.out = length(a)) + res = rep(NA, length.out = length(common_shape)) } #make sure input sizes all match @@ -57,11 +56,15 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { res[sd.zero & a >= mean] = a[sd.zero & a >= mean] res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean] - # Focus in on where sd is nonzero - a = a[!sd.zero] - b = b[!sd.zero] - mean = mean[!sd.zero] - sd = sd[!sd.zero] + # Handle NAN inputs + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + res[isna] = NA + + # Focus in on where sd is nonzero and nothing is nan + a = a[!sd.zero & !isna] + b = b[!sd.zero & !isna] + mean = mean[!sd.zero & !isna] + sd = sd[!sd.zero & !isna] # Rescale to standard normal distributions alpha = (a - mean) / sd @@ -120,7 +123,7 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { scaled.mean[flip] = -scaled.mean[flip] #transform back to nonstandard normal case - res[!sd.zero] = mean + sd * scaled.mean + res[!sd.zero & !isna] = mean + sd * scaled.mean return(res) } @@ -156,14 +159,13 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # based off of https://github.com/cossio/TruncatedNormal.jl do_truncnorm_argchecks(a, b) - # force a to have the correct shape - a = a + 0 * mean + 0 * sd # initialize array to store result - if (is.matrix(a)){ - res = array(dim = dim(a)) + common_shape = 0 * (a + b + mean + sd) + if (is.matrix(common_shape)){ + res = array(dim = dim(common_shape)) } else{ - res = rep(NA, length.out = length(a)) + res = rep(NA, length.out = length(common_shape)) } #make sure input sizes all match @@ -180,12 +182,16 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 # if mean ∈ (a,b), 2nd moment is mean^2 res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 - - # Focus in on where sd is nonzero - a = a[!sd.zero] - b = b[!sd.zero] - mean = mean[!sd.zero] - sd = sd[!sd.zero] + + # Handle NAN inputs + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + res[isna] = NA + + # Focus in on where sd is nonzero and nothing is nan + a = a[!sd.zero & !isna] + b = b[!sd.zero & !isna] + mean = mean[!sd.zero & !isna] + sd = sd[!sd.zero & !isna] # Rescale to standard normal distributions if sd is nonzero alpha = (a - mean) / sd @@ -245,12 +251,12 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # possible catestropic cancellation because μ^2 + σ^2 E(Z^2) ≧ 0 while 2μσE(Z) has unknown sign # If |μ| < σ , compute as μ^2 + σ(σ E(Z^2) + 2 μ E(Z)) mean_smaller = abs(mean) < sd - res[!sd.zero & mean_smaller] = mean^2 + sd*(sd * scaled.2mom + 2 * mean * scaled.mean) + res[!sd.zero & !isna & mean_smaller] = mean^2 + sd*(sd * scaled.2mom + 2 * mean * scaled.mean) # If σ ≦ |μ| , compute as μ(μ + 2 σ E(Z)) + σ^2 E(Z^2) - res[!sd.zero & !mean_smaller] = mean*(mean + 2 * sd * scaled.mean) + sd^2 * scaled.2mom + res[!sd.zero & !isna & !mean_smaller] = mean*(mean + 2 * sd * scaled.mean) + sd^2 * scaled.2mom # Check that the results make sense -- should never be negative - stopifnot(all(res >= 0)) + stopifnot(all(res >= 0 | is.na(res))) return(res) } @@ -285,7 +291,7 @@ my_vtruncnorm = function(a, b, mean = 0, sd = 1) { scaled.res = (m2 - m1) * (m1 + m2) # Handle endpoints equal - scaled.res[alpha == beta] = alpha[alpha == beta] + scaled.res[(alpha == beta) & sd != 0] = alpha[(alpha == beta) & sd != 0] #transform back to unscaled res = sd^2 * scaled.res From ee72aff7c3af3e5ff2e7f7eabeb4c79b208fecf8 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Thu, 24 Mar 2022 21:56:49 -0500 Subject: [PATCH 16/20] issues with nans --- R/truncnorm.R | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 486c1ed..7717d72 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -48,17 +48,17 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { mean = rep(mean, length.out = length(res)) sd = rep(sd, length.out = length(res)) + # Handle NAN inputs + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + res[isna] = NA + # Handle zero sds. Return the mean of the untruncated normal when it is # located inside of the interval [alpha, beta]. Otherwise, return the # endpoint that is closer to the mean. sd.zero = (sd == 0) - res[sd.zero & b <= mean] = b[sd.zero & b <= mean] - res[sd.zero & a >= mean] = a[sd.zero & a >= mean] - res[sd.zero & a < mean & b > mean] = mean[sd.zero & a < mean & b > mean] - - # Handle NAN inputs - isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) - res[isna] = NA + res[!isna & sd.zero & b <= mean] = b[!isna & sd.zero & b <= mean] + res[!isna & sd.zero & a >= mean] = a[!isna & sd.zero & a >= mean] + res[!isna & sd.zero & a < mean & b > mean] = mean[!isna & sd.zero & a < mean & b > mean] # Focus in on where sd is nonzero and nothing is nan a = a[!sd.zero & !isna] @@ -174,19 +174,18 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { mean = rep(mean, length.out = length(res)) sd = rep(sd, length.out = length(res)) - # Handle zero sd point masses - sd.zero = (sd == 0) - # if mean ≥ b, 2nd moment is b^2 - res[sd.zero & b <= mean] = b[sd.zero & b <= mean]^2 - # if mean ≤ a, 2nd moment is a^2 - res[sd.zero & a >= mean] = a[sd.zero & a >= mean]^2 - # if mean ∈ (a,b), 2nd moment is mean^2 - res[sd.zero & a < mean & mean < b] = mean[sd.zero & a < mean & mean < b]^2 - # Handle NAN inputs isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) res[isna] = NA + # Handle zero sds. Return the mean of the untruncated normal when it is + # located inside of the interval [alpha, beta]. Otherwise, return the + # endpoint that is closer to the mean. + sd.zero = (sd == 0) + res[!isna & sd.zero & b <= mean] = b[!isna & sd.zero & b <= mean]^2 + res[!isna & sd.zero & a >= mean] = a[!isna & sd.zero & a >= mean]^2 + res[!isna & sd.zero & a < mean & b > mean] = mean[!isna & sd.zero & a < mean & b > mean]^2 + # Focus in on where sd is nonzero and nothing is nan a = a[!sd.zero & !isna] b = b[!sd.zero & !isna] @@ -291,7 +290,8 @@ my_vtruncnorm = function(a, b, mean = 0, sd = 1) { scaled.res = (m2 - m1) * (m1 + m2) # Handle endpoints equal - scaled.res[(alpha == beta) & sd != 0] = alpha[(alpha == beta) & sd != 0] + isna = is.na(a) | is.na(b) | is.na(mean) | is.na(sd) + scaled.res[(alpha == beta) & sd != 0 & !isna] = alpha[(alpha == beta) & sd != 0 & !isna] #transform back to unscaled res = sd^2 * scaled.res From 7d70cf0232c8e358017d83d23103ccabc147bab0 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Fri, 25 Mar 2022 14:31:12 -0500 Subject: [PATCH 17/20] add more unit tests --- src/RcppExports.cpp | 5 ++++ tests/testthat/test_myetruncnorm.R | 48 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 612d70e..c04dc3a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -5,6 +5,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // cxxMixSquarem List cxxMixSquarem(NumericMatrix matrix_lik, NumericVector prior, NumericVector pi_init, List control); RcppExport SEXP _ashr_cxxMixSquarem(SEXP matrix_likSEXP, SEXP priorSEXP, SEXP pi_initSEXP, SEXP controlSEXP) { diff --git a/tests/testthat/test_myetruncnorm.R b/tests/testthat/test_myetruncnorm.R index c5e2198..1568057 100644 --- a/tests/testthat/test_myetruncnorm.R +++ b/tests/testthat/test_myetruncnorm.R @@ -1,18 +1,19 @@ context("my_etruncnorm") test_that("my_etruncnorm returns expected results", { - expect_equal(-100,my_etruncnorm(-Inf,-100,0,1),tolerance=0.01) - expect_equal(-100,my_etruncnorm(-Inf,-100,0,0)) - expect_equal(30,my_etruncnorm(30,100,0,0)) - real = c(-100,-100,30) - a=c(-Inf,-Inf,30) - b=c(-100,-100,100) - m=c(0,0,0) - sd=c(1,0,0) + a = c(-Inf,-Inf, 30,NA, 1, 1, 1,1,100,100,-100,-Inf, 0,-1, 1, -2, -2) + b = c(-100,-100,100, 1,NA, 1, 1,1,Inf,Inf, -30, Inf, Inf, 1, 2, 2, 3) + m = c( 0, 0, 0, 1, 1,NA, 1,1, 0, 0, 0, 5, 0, 0, 0,-5e9, 0) + sd = c( 1, 0, 0, 1, 1, 1,NA,1, 1, 0, 0, 2, 1, 1, 1, 2,1e4) + real = c(-100,-100, 30,NA,NA,NA,NA,1,100,100, -30, 5,0.80, 0,1.38, -2,0.5) + N = length(real) + for (idx in 1:N){ + expect_equal(real[idx],my_etruncnorm(a[idx],b[idx],m[idx],sd[idx]),tolerance=0.01) + } expect_equal(real,my_etruncnorm(a,b,m,sd),tolerance=0.01) - real = matrix(real,3,4) - m = matrix(m,3,4) - sd = matrix(sd,3,4) + real = matrix(real,N,4) + m = matrix(m,N,4) + sd = matrix(sd,N,4) expect_equal(real,my_etruncnorm(a,b,m,sd),tolerance=0.01) a=c(0,0) b=c(1,2) @@ -25,23 +26,26 @@ test_that("my_etruncnorm returns expected results", { expect_equal(my_etruncnorm(0,9999,-2,3),my_etruncnorm(0,Inf,-2,3),tol=1e-3) expect_error(my_etruncnorm(0, 1:2, mean = 0, sd = 1)) expect_error(my_etruncnorm(1, 0, mean = 0, sd = 1)) + + #TODO add test cases from pull request }) context("my_vtruncnorm") test_that("my_vtruncnorm returns expected results", { - expect_equal(0, my_vtruncnorm(-Inf, -100), tolerance = 0.01) - expect_equal(0, my_vtruncnorm(-Inf, -100, sd = 0)) - expect_equal(0, my_vtruncnorm(30, 100, sd = 0)) - real = c(0, 0, 0) - a = c(-Inf, -Inf, 30) - b = c(-100, -100, 100) - m = c(0, 0, 0) - sd = c(1, 0, 0) + a = c(-Inf,-Inf, 30,NA, 1, 1, 1,1,100,100,-100,-Inf, 0, -1, 1, -2, -2) + b = c(-100,-100,100, 1,NA, 1, 1,1,Inf,Inf, -30, Inf, Inf, 1, 2, 2, 3) + m = c( 0, 0, 0, 1, 1,NA, 1,1, 0, 0, 0, 5, 0, 0, 0,-5e9, 0) + sd = c( 1, 0, 0, 1, 1, 1,NA,1, 1, 0, 0, 2, 2, 1, 1, 2, 1e4) + real = c( 0, 0, 0,NA,NA,NA,NA,0, 0, 0, 0, 4,1.45,0.29,0.07, 0,2.08) + N = length(real) + for (idx in 1:N){ + expect_equal(real[idx],my_vtruncnorm(a[idx],b[idx],m[idx],sd[idx]),tolerance=0.01) + } expect_equal(real, my_vtruncnorm(a, b, m, sd), tolerance = 0.01) - real = matrix(real, 3, 4) - m = matrix(m, 3, 4) - sd = matrix(sd, 3, 4) + real = matrix(real, N, 4) + m = matrix(m, N, 4) + sd = matrix(sd, N, 4) expect_equal(real, my_vtruncnorm(a, b, m, sd), tolerance = 0.01) a = c(0, 0) b = c(1, 2) From 6200072bf7f3b0c4f64d04af4206e77b3763ef36 Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Fri, 25 Mar 2022 15:11:35 -0500 Subject: [PATCH 18/20] small bug in masking to change how computations are done --- R/truncnorm.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 7717d72..2028623 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -249,10 +249,10 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { # μ^2 + σ^2 E(Z^2) + 2 μ σ E(Z) # possible catestropic cancellation because μ^2 + σ^2 E(Z^2) ≧ 0 while 2μσE(Z) has unknown sign # If |μ| < σ , compute as μ^2 + σ(σ E(Z^2) + 2 μ E(Z)) - mean_smaller = abs(mean) < sd - res[!sd.zero & !isna & mean_smaller] = mean^2 + sd*(sd * scaled.2mom + 2 * mean * scaled.mean) + m_sd = abs(mean) < sd + res[!sd.zero & !isna][m_sd] = mean[m_sd]^2 + sd[m_sd]*(sd[m_sd] * scaled.2mom[m_sd] + 2 * mean[m_sd] * scaled.mean[m_sd]) # If σ ≦ |μ| , compute as μ(μ + 2 σ E(Z)) + σ^2 E(Z^2) - res[!sd.zero & !isna & !mean_smaller] = mean*(mean + 2 * sd * scaled.mean) + sd^2 * scaled.2mom + res[!sd.zero & !isna][!m_sd] = mean[!m_sd]*(mean[!m_sd] + 2 * sd[!m_sd] * scaled.mean[!m_sd]) + sd[!m_sd]^2 * scaled.2mom[!m_sd] # Check that the results make sense -- should never be negative stopifnot(all(res >= 0 | is.na(res))) From 70b371cdedb7074072f67fc3fe8faa52d90e662f Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 28 Mar 2022 16:56:49 -0500 Subject: [PATCH 19/20] correcting small errors silently, raising errors for big errors --- R/truncnorm.R | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 2028623..0d5e57d 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -125,6 +125,14 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #transform back to nonstandard normal case res[!sd.zero & !isna] = mean + sd * scaled.mean + #throw error if results are far outside the plausible range + error_tol = 1 + stopifnot(all(res > a-error_tol | is.na(res))) + stopifnot(all(res < b+error_tol | is.na(res))) + #silently correct small errors + res[res < a & !isna] = a + res[res > b & !isna] = b + return(res) } @@ -253,9 +261,15 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { res[!sd.zero & !isna][m_sd] = mean[m_sd]^2 + sd[m_sd]*(sd[m_sd] * scaled.2mom[m_sd] + 2 * mean[m_sd] * scaled.mean[m_sd]) # If σ ≦ |μ| , compute as μ(μ + 2 σ E(Z)) + σ^2 E(Z^2) res[!sd.zero & !isna][!m_sd] = mean[!m_sd]*(mean[!m_sd] + 2 * sd[!m_sd] * scaled.mean[!m_sd]) + sd[!m_sd]^2 * scaled.2mom[!m_sd] - - # Check that the results make sense -- should never be negative - stopifnot(all(res >= 0 | is.na(res))) + # TODO experiment with whether the above is a good idea or not... + + #throw error if results are far outside the plausible range + error_tol = 1 + stopifnot(all(res > a^2-error_tol | is.na(res))) + stopifnot(all(res < b^2+error_tol | is.na(res))) + #silently correct small errors + res[res < a^2 & !isna] = a^2 + res[res > b^2 & !isna] = b^2 return(res) } @@ -299,6 +313,12 @@ my_vtruncnorm = function(a, b, mean = 0, sd = 1) { # Handle zero sds. res[sd == 0] = 0 + #throw error if results are far outside the plausible range + error_tol = 1 + stopifnot(all(res > -error_tol | is.na(res))) + #silently correct small errors + res[res < 0 & !isna] = 0 + return(res) } From 8b8883420038987d9a13911e607960d69f655e4c Mon Sep 17 00:00:00 2001 From: Sue Parkinson Date: Mon, 28 Mar 2022 17:12:16 -0500 Subject: [PATCH 20/20] better error messages --- R/truncnorm.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/truncnorm.R b/R/truncnorm.R index 0d5e57d..ed03b6d 100644 --- a/R/truncnorm.R +++ b/R/truncnorm.R @@ -127,8 +127,9 @@ my_etruncnorm = function(a, b, mean = 0, sd = 1) { #throw error if results are far outside the plausible range error_tol = 1 - stopifnot(all(res > a-error_tol | is.na(res))) - stopifnot(all(res < b+error_tol | is.na(res))) + if (any(res < a-error_tol & !isna) | any(res > b+error_tol & !isna)) { + stop("Computed expected value of truncated normal is outside plausible range") + } #silently correct small errors res[res < a & !isna] = a res[res > b & !isna] = b @@ -265,8 +266,9 @@ my_e2truncnorm = function(a, b, mean = 0, sd = 1) { #throw error if results are far outside the plausible range error_tol = 1 - stopifnot(all(res > a^2-error_tol | is.na(res))) - stopifnot(all(res < b^2+error_tol | is.na(res))) + if (any(res < a^2-error_tol & !isna) | any(res > b^2+error_tol & !isna)) { + stop("Computed second moment of truncated normal is outside plausible range") + } #silently correct small errors res[res < a^2 & !isna] = a^2 res[res > b^2 & !isna] = b^2 @@ -315,7 +317,9 @@ my_vtruncnorm = function(a, b, mean = 0, sd = 1) { #throw error if results are far outside the plausible range error_tol = 1 - stopifnot(all(res > -error_tol | is.na(res))) + if (any(res < -error_tol & !isna)) { + stop("Computed variance of truncated normal is outside plausible range") + } #silently correct small errors res[res < 0 & !isna] = 0