Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Poisson log link PMF #115

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ branches:
# This is the minimal set of R packages needed to run "R CMD check" on
# the package.
install:
- R -e 'install.packages(c("assertthat","devtools","etrunct","truncnorm","Rcpp","foreach","doParallel","pscl","SQUAREM","testthat","rmarkdown","knitr","ggplot2"))'
- R -e 'install.packages(c("assertthat","devtools","etrunct","expint","truncnorm","Rcpp","foreach","doParallel","pscl","SQUAREM","testthat","rmarkdown","knitr","ggplot2"))'
- R -e 'devtools::install_github("stephenslab/mixsqp")'

env:
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ Imports:
pscl,
Rcpp (>= 0.10.5),
foreach,
etrunct
etrunct,
expint
LinkingTo: Rcpp
Suggests:
testthat,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ import(foreach)
import(parallel)
import(pscl)
import(truncnorm)
importFrom(expint,gammainc)
importFrom(graphics,abline)
importFrom(graphics,legend)
importFrom(graphics,lines)
Expand Down
29 changes: 20 additions & 9 deletions R/ash.R
Original file line number Diff line number Diff line change
Expand Up @@ -301,12 +301,16 @@ ash.workhorse <-

# set range to search the mode
if (lik$name=="pois"){
lam = lik$data$y / lik$data$scale
if (lik$data$link=="identity"){
args$modemin = min(mode, min(lik$data$y/lik$data$scale),na.rm = TRUE)
args$modemax = max(mode, max(lik$data$y/lik$data$scale),na.rm = TRUE)
}else if (lik$data$link=="log"){
args$modemin = min(log(lik$data$y/lik$data$scale+0.01))
args$modemax = max(log(lik$data$y/lik$data$scale+0.01))
args$modemin = min(mode, min(lam), na.rm = TRUE)
args$modemax = max(mode, max(lam), na.rm = TRUE)
}
else if (lik$data$link=="log"){
eps = 1 / mean(lik$data$scale)
log_lam = log(lam + eps)
args$modemin = min(log_lam)
args$modemax = max(log_lam)
}
}else if(lik$name=="binom"){
if (lik$data$link=="identity"){
Expand Down Expand Up @@ -753,16 +757,23 @@ qval.from.lfdr = function(lfdr){
return(qvalue)
}

# try to select a default range for the sigmaa values
# try to select a default range for the sigma values
# that should be used, based on the values of betahat and sebetahat
# mode is the location about which inference is going to be centered
# mult is the multiplier by which the sds differ across the grid
# grange is the user-specified range of mixsd
autoselect.mixsd = function(data,mult,mode,grange,mixcompdist){
if (data$lik$name %in% c("pois")){
data$x = data$lik$data$y/data$lik$data$scale #estimate of lambda
data$s = sqrt(data$x)/data$lik$data$scale #standard error of estimate
# if the link is log we probably want to take the log of this?
lam = data$lik$data$y / data$lik$data$scale
if (data$lik$data$link == "identity"){
data$x = lam
data$s = sqrt(data$x) / data$lik$data$scale
}
else {
eps = 1 / mean(data$lik$data$scale)
data$x = log(lam + eps)
data$s = sqrt(var(lam) / (lam + eps)^2)
}
}
if (data$lik$name %in% c("binom")){
data$x = data$lik$data$y
Expand Down
26 changes: 19 additions & 7 deletions R/lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ lik_logF = function(df1,df2){
#' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}.
#' We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g}
#' (by specifying \code{link="identity"}) or on the log intensities
#' \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' ASH with this Poisson likelihood function will compute the posterior mean of the
#' intensities \eqn{\lambda_i}.
#' @param y Poisson observations.
Expand All @@ -73,6 +73,7 @@ lik_logF = function(df1,df2){
#'
#' @importFrom stats pgamma
#' @importFrom stats dgamma
#' @importFrom expint gammainc
#'
#' @export
#'
Expand All @@ -81,19 +82,30 @@ lik_pois = function(y, scale=1, link=c("identity","log")){
if (link=="identity"){
list(name = "pois",
const = TRUE,
## log_comp_dens_conv.unimix calls this like
##
## lcdfFUN((data$x - a) / data$s)
##
## where data is an ash data object with data$x = 0 and data$s = 1, so
## we need to take absolute values
lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)},
lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)},
## comp_postmean.unimix calls this like
##
## x - s * do.call(lik$etruncFUN, list(alpha, beta))
##
## so we need to return the negative
etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)},
data=list(y=y, scale=scale, link=link))
}else if (link=="log"){
y1 = y+1e-5 # add pseudocount
}
else if (link=="log"){
list(name = "pois",
const = TRUE,
lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)},
lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)},
etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)},
lcdfFUN = function(x) {log(expint::gammainc(y, scale * exp(-x))) - lgamma(y + 1)},
lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)},
etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y,scale)},
data=list(y=y, scale=scale, link=link))
}
}
Expand Down
5 changes: 5 additions & 0 deletions R/unimix.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ comp_dens_conv.unimix = function(m,data){
}

#' log density of convolution of each component of a unif mixture
#'
#' @inheritParams comp_dens_conv.unimix
#' @return a k by n matrix of densities
log_comp_dens_conv.unimix = function(m,data){
Expand All @@ -74,8 +75,12 @@ log_comp_dens_conv.unimix = function(m,data){
}

lcomp_dens = t(lpa + log(1-exp(lpb-lpa))) - log(b-a)
# Handle the case lpa = -Inf, lpb = -Inf, lpb - lpa = NaN. This can happen
# for Poisson likelihood with log link
lcomp_dens[lpa == -Inf & lpb == -Inf] = -Inf
lcomp_dens[a==b,] = t(do.call(lik$lpdfFUN, list(outer(data$x,b,FUN="-")/data$s))
-log(data$s))[a==b,]

return(lcomp_dens)
}

Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ environment:
# This is the minimal set of R packages needed to run "R CMD check" on
# the package.
build_script:
- R -e install.packages(c('assertthat','devtools','etrunct','truncnorm','Rcpp','foreach','doParallel','pscl','SQUAREM','testthat','rmarkdown','knitr','ggplot2'),head(.libPaths(),1),'http://cran.wustl.edu')
- R -e install.packages(c('assertthat','devtools','etrunct','expint','truncnorm','Rcpp','foreach','doParallel','pscl','SQUAREM','testthat','rmarkdown','knitr','ggplot2'),head(.libPaths(),1),'http://cran.wustl.edu')
- R -e devtools::install_github('stephenslab/mixsqp')

test_script:
Expand Down
2 changes: 1 addition & 1 deletion man/lik_pois.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 95 additions & 1 deletion tests/testthat/test_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,100 @@ test_that("general likelihood with multiple df works", {

data =set_data(betahat,s,lik = lik_t(df),alpha=0)
expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s)))

})

test_that("Poisson (identity link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, .1, .2)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, .1, .1)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, 1e-3, 2e-3)
lik = lik_pois(y=y, scale=s, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.2))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.1))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))}))
expect_equal(py, true_py)
})

test_that("Poisson (log link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, log(1e-3), log(2e-3))
lik = lik_pois(y=y, scale=s, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("ln p(x = 0 | s) is correct for Poisson likelihood (log link)", {
s = 10 ^ seq(log10(1e3), log10(1e6), .1)
y = rep(0, length(s))
g = unimix(1, log(1e-3), log(2e-3))
lik = lik_pois(y=y, scale=s, link="log")
data = set_data(rep(0, length(y)), 1, lik)
log_py = drop(log(comp_dens_conv(g, data)))
true_log_py = log(unlist(lapply(s, function (z) {integrate(function(log_lam) {dpois(0, z * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})))
## Work around a recent bug comparing Inf (called from expect_equal)
##
## https://github.com/r-lib/testthat/issues/789
## https://github.com/r-lib/testthat/commit/992ddd82fd7b6f1fdc5bb66c31db94277f3df126
expect_true(all((true_log_py == log_py | abs(true_log_py - log_py) < 1e-2)))
})

test_that("Poisson (identity link) returns sensible marginal PMF on real data", {
dat = readRDS("test_pois_data.Rds")
fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform")
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})

test_that("Poisson (log link) returns sensible marginal PMF on real data", {
dat = readRDS("test_pois_data.Rds")
lam <- dat$x / dat$scale
eps = 1 / mean(dat$scale)
log_lam <- log(lam + eps)
se_log_lam <- sqrt(var(lam) / (lam + eps)^2)
mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2))
fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd)
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})
Loading