stephens999 · aksarkar · Dec 27, 2019 · Dec 27, 2019 · Dec 27, 2019 · Jan 1, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -11,7 +11,7 @@ branches:
 # This is the minimal set of R packages needed to run "R CMD check" on
 # the package.
 install:
-  - R -e 'install.packages(c("assertthat","devtools","etrunct","truncnorm","Rcpp","foreach","doParallel","pscl","SQUAREM","testthat","rmarkdown","knitr","ggplot2"))'
+  - R -e 'install.packages(c("assertthat","devtools","etrunct","expint","truncnorm","Rcpp","foreach","doParallel","pscl","SQUAREM","testthat","rmarkdown","knitr","ggplot2"))'
   - R -e 'devtools::install_github("stephenslab/mixsqp")'
 
 env:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -43,7 +43,8 @@ Imports:
     pscl,
     Rcpp (>= 0.10.5),
     foreach,
-    etrunct
+    etrunct,
+    expint
 LinkingTo: Rcpp
 Suggests:
     testthat,

diff --git a/NAMESPACE b/NAMESPACE
@@ -137,6 +137,7 @@ import(foreach)
 import(parallel)
 import(pscl)
 import(truncnorm)
+importFrom(expint,gammainc)
 importFrom(graphics,abline)
 importFrom(graphics,legend)
 importFrom(graphics,lines)

diff --git a/R/ash.R b/R/ash.R
@@ -301,12 +301,16 @@ ash.workhorse <-
 
     # set range to search the mode
     if (lik$name=="pois"){
+      lam = lik$data$y / lik$data$scale
       if (lik$data$link=="identity"){
-        args$modemin = min(mode, min(lik$data$y/lik$data$scale),na.rm = TRUE)
-        args$modemax = max(mode, max(lik$data$y/lik$data$scale),na.rm = TRUE)
-      }else if (lik$data$link=="log"){
-        args$modemin = min(log(lik$data$y/lik$data$scale+0.01))
-        args$modemax = max(log(lik$data$y/lik$data$scale+0.01))
+        args$modemin = min(mode, min(lam), na.rm = TRUE)
+        args$modemax = max(mode, max(lam), na.rm = TRUE)
+      }
+      else if (lik$data$link=="log"){
+        eps = 1 / mean(lik$data$scale)
+        log_lam = log(lam + eps)
+        args$modemin = min(log_lam)
+        args$modemax = max(log_lam)
       }
     }else if(lik$name=="binom"){
       if (lik$data$link=="identity"){
@@ -753,16 +757,23 @@ qval.from.lfdr = function(lfdr){
   return(qvalue)
 }
 
-# try to select a default range for the sigmaa values
+# try to select a default range for the sigma values
 # that should be used, based on the values of betahat and sebetahat
 # mode is the location about which inference is going to be centered
 # mult is the multiplier by which the sds differ across the grid
 # grange is the user-specified range of mixsd
 autoselect.mixsd = function(data,mult,mode,grange,mixcompdist){
   if (data$lik$name %in% c("pois")){
-    data$x = data$lik$data$y/data$lik$data$scale #estimate of lambda
-    data$s = sqrt(data$x)/data$lik$data$scale #standard error of estimate
-    # if the link is log we probably want to take the log of this?
+    lam = data$lik$data$y / data$lik$data$scale
+    if (data$lik$data$link == "identity"){
+      data$x = lam
+      data$s = sqrt(data$x) / data$lik$data$scale
+    }
+    else {
+      eps = 1 / mean(data$lik$data$scale)
+      data$x = log(lam + eps)
+      data$s = sqrt(var(lam) / (lam + eps)^2)
+    }
   }
   if (data$lik$name %in% c("binom")){
     data$x = data$lik$data$y

diff --git a/R/lik.R b/R/lik.R
@@ -58,7 +58,7 @@ lik_logF = function(df1,df2){
 #' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}. 
 #'    We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g} 
 #'    (by specifying \code{link="identity"}) or on the log intensities 
-#'    \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, 
+#'    \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, 
 #'    ASH with this Poisson likelihood function will compute the posterior mean of the 
 #'    intensities \eqn{\lambda_i}.
 #' @param y Poisson observations.
@@ -73,6 +73,7 @@ lik_logF = function(df1,df2){
 #'
 #' @importFrom stats pgamma
 #' @importFrom stats dgamma
+#' @importFrom expint gammainc
 #' 
 #' @export
 #'
@@ -81,19 +82,30 @@ lik_pois = function(y, scale=1, link=c("identity","log")){
   if (link=="identity"){
     list(name = "pois",
          const = TRUE,
+         ## log_comp_dens_conv.unimix calls this like
+         ##
+         ## lcdfFUN((data$x - a) / data$s)
+         ##
+         ## where data is an ash data object with data$x = 0 and data$s = 1, so
+         ## we need to take absolute values
          lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)},
          lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)},
+         ## comp_postmean.unimix calls this like
+         ##
+         ## x - s * do.call(lik$etruncFUN, list(alpha, beta))
+         ##
+         ## so we need to return the negative
          etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)},
          e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)},
          data=list(y=y, scale=scale, link=link))
-  }else if (link=="log"){
-    y1 = y+1e-5 # add pseudocount
+  }
+  else if (link=="log"){
     list(name = "pois",
          const = TRUE,
-         lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)},
-         lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)},
-         etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)},
-         e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)},
+         lcdfFUN = function(x) {log(expint::gammainc(y, scale * exp(-x))) - lgamma(y + 1)},
+         lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)},
+         etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y,scale)},
+         e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y,scale)},
          data=list(y=y, scale=scale, link=link))
   }
 }

diff --git a/R/unimix.R b/R/unimix.R
@@ -57,6 +57,7 @@ comp_dens_conv.unimix = function(m,data){
 }
 
 #' log density of convolution of each component of a unif mixture 
+#'
 #' @inheritParams comp_dens_conv.unimix
 #' @return a k by n matrix of densities
 log_comp_dens_conv.unimix = function(m,data){
@@ -74,8 +75,12 @@ log_comp_dens_conv.unimix = function(m,data){
   }
 
   lcomp_dens = t(lpa + log(1-exp(lpb-lpa))) - log(b-a)
+  # Handle the case lpa = -Inf, lpb = -Inf, lpb - lpa = NaN. This can happen
+  # for Poisson likelihood with log link
+  lcomp_dens[lpa == -Inf & lpb == -Inf] = -Inf
   lcomp_dens[a==b,] = t(do.call(lik$lpdfFUN, list(outer(data$x,b,FUN="-")/data$s))
                        -log(data$s))[a==b,]
+
   return(lcomp_dens)
 }
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -22,7 +22,7 @@ environment:
 # This is the minimal set of R packages needed to run "R CMD check" on
 # the package.
 build_script:
-  - R -e install.packages(c('assertthat','devtools','etrunct','truncnorm','Rcpp','foreach','doParallel','pscl','SQUAREM','testthat','rmarkdown','knitr','ggplot2'),head(.libPaths(),1),'http://cran.wustl.edu')
+  - R -e install.packages(c('assertthat','devtools','etrunct','expint','truncnorm','Rcpp','foreach','doParallel','pscl','SQUAREM','testthat','rmarkdown','knitr','ggplot2'),head(.libPaths(),1),'http://cran.wustl.edu')
   - R -e devtools::install_github('stephenslab/mixsqp')
 
 test_script:

diff --git a/man/lik_pois.Rd b/man/lik_pois.Rd
diff --git a/tests/testthat/test_lik.R b/tests/testthat/test_lik.R
@@ -30,6 +30,100 @@ test_that("general likelihood with multiple df works", {
 
   data =set_data(betahat,s,lik = lik_t(df),alpha=0)
   expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s)))
-
+})
+
+test_that("Poisson (identity link) marginal PMF is correct", {
+  y = seq(0, 100)
+  g = unimix(1, .1, .2)
+  lik = lik_pois(y=y, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (identity link) marginal PMF for point mass is correct", {
+  y = seq(0, 100)
+  g = unimix(1, .1, .1)
+  lik = lik_pois(y=y, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (identity link) marginal PMF is correct with scale factors", {
+  y = seq(0, 100)
+  s = 100
+  g = unimix(1, 1e-3, 2e-3)
+  lik = lik_pois(y=y, scale=s, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("Poisson (log link) marginal PMF is correct", {
+  y = seq(0, 100)
+  g = unimix(1, log(.1), log(.2))
+  lik = lik_pois(y=y, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("Poisson (log link) marginal PMF for point mass is correct", {
+  y = seq(0, 100)
+  g = unimix(1, log(.1), log(.1))
+  lik = lik_pois(y=y, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (log link) marginal PMF is correct with scale factors", {
+  y = seq(0, 100)
+  s = 100
+  g = unimix(1, log(1e-3), log(2e-3))
+  lik = lik_pois(y=y, scale=s, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("ln p(x = 0 | s) is correct for Poisson likelihood (log link)", {
+  s = 10 ^ seq(log10(1e3), log10(1e6), .1)
+  y = rep(0, length(s))
+  g = unimix(1, log(1e-3), log(2e-3))
+  lik = lik_pois(y=y, scale=s, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  log_py = drop(log(comp_dens_conv(g, data)))
+  true_log_py = log(unlist(lapply(s, function (z) {integrate(function(log_lam) {dpois(0, z * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})))
+  ## Work around a recent bug comparing Inf (called from expect_equal)
+  ##
+  ## https://github.com/r-lib/testthat/issues/789
+  ## https://github.com/r-lib/testthat/commit/992ddd82fd7b6f1fdc5bb66c31db94277f3df126
+  expect_true(all((true_log_py == log_py | abs(true_log_py - log_py) < 1e-2)))
+})
+
+test_that("Poisson (identity link) returns sensible marginal PMF on real data", {
+  dat = readRDS("test_pois_data.Rds")
+  fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform")
+  F = comp_dens_conv(fit$fitted_g, fit$data)
+  expect_true(all(F < 1))
+})
 
+test_that("Poisson (log link) returns sensible marginal PMF on real data", {
+  dat = readRDS("test_pois_data.Rds")
+  lam <- dat$x / dat$scale
+  eps = 1 / mean(dat$scale)
+  log_lam <- log(lam + eps)
+  se_log_lam <- sqrt(var(lam) / (lam + eps)^2)
+  mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2))
+  fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd)
+  F = comp_dens_conv(fit$fitted_g, fit$data)
+  expect_true(all(F < 1))
 })