Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Poisson log link PMF #115

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
15 changes: 12 additions & 3 deletions R/lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ lik_logF = function(df1,df2){
#' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}.
#' We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g}
#' (by specifying \code{link="identity"}) or on the log intensities
#' \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' ASH with this Poisson likelihood function will compute the posterior mean of the
#' intensities \eqn{\lambda_i}.
#' @param y Poisson observations.
Expand All @@ -81,17 +81,26 @@ lik_pois = function(y, scale=1, link=c("identity","log")){
if (link=="identity"){
list(name = "pois",
const = TRUE,
## Important: in comp_dens_conv, this will be called like
##
## lcdfFUN((data$x - a) / data$s)
##
## where data is an ash data object with data$x = 0 and data$s =
## 1. Therefore, we need to take absolute values
lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)},
lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)},
etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)},
data=list(y=y, scale=scale, link=link))
}else if (link=="log"){
}
else if (link=="log"){
y1 = y+1e-5 # add pseudocount
list(name = "pois",
const = TRUE,
## comp_dens_conv calls this with negative of the expected arguments
lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)},
lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)},
## This is PMF marginalizing over a point mass on x
lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)},
etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)},
aksarkar marked this conversation as resolved.
Show resolved Hide resolved
data=list(y=y, scale=scale, link=link))
Expand Down
82 changes: 81 additions & 1 deletion tests/testthat/test_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,86 @@ test_that("general likelihood with multiple df works", {

data =set_data(betahat,s,lik = lik_t(df),alpha=0)
expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s)))

})

test_that("Poisson (identity link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, .1, .2)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, .1, .1)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, 1e-3, 2e-3)
lik = lik_pois(y=y, scale=s, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.2))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.1))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))}))
expect_equal(py, true_py)
})

test_that("Poisson (log link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, log(1e-3), log(2e-3))
lik = lik_pois(y=y, scale=s, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (identity link) returns sensible marginal PMF on real data", {
skip("save time")
dat = readRDS("test_pois_data.Rds")
fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform")
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})

test_that("Poisson (log link) returns sensible marginal PMF on real data", {
dat = readRDS("test_pois_data.Rds")
lam <- dat$x / dat$scale
eps = 1 / mean(dat$scale)
log_lam <- log(lam + eps)
se_log_lam <- sqrt(var(lam) / (lam + eps)^2)
mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2))
fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd)
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})
14 changes: 7 additions & 7 deletions tests/testthat/test_pois.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ test_that("lik_pois (log link) fitted mode is close to true mode",{
})

test_that("Mode estimation for pois_lik finds an acceptable solution", {
set.seed(1)
# Load example 10X Genomics data
dat = readRDS("test_pois_data.Rds")
m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
lam = dat$x / dat$scale
m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
set.seed(1)
## Load example 10X Genomics data
dat = readRDS("test_pois_data.Rds")
m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
lam = dat$x / dat$scale
m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
})