itu-square · wasowski · Feb 28, 2023 · Mar 7, 2023 · Mar 11, 2023 · Mar 11, 2023
diff --git a/src/main/scala/symsim/concrete/Stats.scala b/src/main/scala/symsim/concrete/Stats.scala
@@ -40,15 +40,82 @@ def GaussianPosterior (prior_μ: Double, prior_σ2: Double, likelihood_σ2: Doub
   (post_μ, post_σ2)
 
 
+/** Numerically finds a minimum of the function f, using x0 as a seed
+ *  (starting point).
+ *
+ *  For the time being the gradient is approximated numerically,
+ *  although for our current use, we could probably find an analytical
+ *  solution. This would likely make the tests faster and less flaky.
+ */
+def minimize (f: Double => Double, x0: Double): Double =
+  import breeze.optimize.*
+  import breeze.linalg.DenseVector
+
+  // I am wrapping it into vector of size 1, because I do not
+  // understand how to force breeze to optimize a function directly on
+  // a Double value.  This likely needs to be fixed one day
+  val differrentiableF = 
+    ApproximateGradientFunction { (x: DenseVector[Double]) => f(x(0)) }
+
+  val lbfgs = new LBFGS[DenseVector[Double]] (maxIter = 500, m = 4) 
+  val xMin = lbfgs.minimize(differrentiableF, DenseVector(x0))
+  xMin (0)
 
-/** Probability mass between two points in a Gaussian distribution 
+/** Compute an highest density interval of mass p for density Beta(α, β).
+ *
+ *  This is approximate (because of a numerical optimization step)
+ *  Apparently, the procedure is quite general and we could use it for
+ *  other distributions too, but for the time being we are trying it
+ *  for Beta.
+ *  
+ *  We only handle nondegenerate cases, with α and β bigger than one.
+ *
+ *  Source: whuber (https://stats.stackexchange.com/users/919/whuber),
+ *  Credible set for beta distribution, URL (version: 2015-07-05):
+ *  https://stats.stackexchange.com/q/160035
+ *
+ *  Also: Andrey Akinshin. Trimmed Harrell-Davis quantile estimator
+ *  based on the highest density interval of the given width.  (but we
+ *  do given probability mass not given width).
  *
- *  TODO: check if this is not provided by breeze already; if so
- *  remove
+ *
+ *  @param α the number of successes observed (+1), σ > 1
+ *  @param β the number of failures observed (+1), β > 1
+ *  @param p the probability mass of the sought credibility interval
+ *
+ *  @return the endpoints of the HDI of mass `p` for density β(α, β).
  */
-def GaussianMass (μ: Double, σ2: Double) (left: Double, right: Double)
-  : Probability =
-  val mass_right  = Gaussian (μ, σ2).cdf (right)
-  val mass_left = Gaussian (μ, σ2).cdf (left) 
-  mass_right - mass_left
+def betaHdi (α: Double, β: Double, p: Double): (Double, Double) = 
+
+  require (α > 1.0)
+  require (β > 1.0)
+  require (p >= 0.0)
+  require (p <= 1.0)
+
+  val pc = 1.0 - p
+  val distr = Beta (α, β)
+  val mode = distr.mode
+  val l0 = distr.inverseCdf (pc / 2.0)
+
+  /** The objective function for the search.  We need to find a zero
+   *  in 2D space of this function.
+   */
+  def f (l: Double): Double = 
+    val pl = if l <= 0 then 0.0
+             else if l < 1.0 then distr.cdf (l)
+             else 1.0
+    val r = distr.inverseCdf ((pl + p) min 1.0)
+    val pr = if r <= 0 then 0.0
+             else if r < 1.0 then distr.cdf (r)
+             else 1.0
+    val diff = pl - pr 
+    val penalty = pr - pl - p // penalty if the interval is too small
+    diff * diff + penalty * penalty
 
+  assert (0 <= l0 && l0 <= 1.0, s"the initial value $l0 illegal for Beta")
+  val l = minimize (f, l0)
+  assert (0 <= l && l <= 1.0, s"the optimized left point $l illegal for Beta")
+  val r = distr.inverseCdf (distr.cdf (l) + p)
+  assert (0 <= r && r <= 1.0, s"the optimized right point $r illegal for Beta")
+  assert (l < r, s"the right HDI point $r is smaller than the left point $l")
+  (l, r).ensuring (0 <= l && l <= r && r <= 1.0, s"HDI: $l, $r")
diff --git a/src/main/scala/symsim/laws/ConcreteExpectedSarsaLaws.scala b/src/main/scala/symsim/laws/ConcreteExpectedSarsaLaws.scala
@@ -60,31 +60,39 @@ case class ConcreteExpectedSarsaLaws[State, ObservableState, Action]
 
     // TODO: this test is now repeated from ConcreteSarsaLaws. There
     // is some taxonomy error in the laws hierarchy. Needs refactoring
-    "probability of not choosing the best action is smaller than ε" ->
+    "probability of not choosing the best action is ε(1-|Action|^-1)" ->
       forAllNoShrink { (q: Q, a_t: Action) =>
 
-        val n = 4000
-        val ε = 0.1 // Ignore ε in the problem as it might be zero for
-                    // the sake of the other test
+        val n = 3000
+        val ε = 0.5 // Ignore ε in the problem as it might be zero for
+                    // the sake of the other test. 
+                    // Also a central ε makes the Bernouli test much stronger
+                    // (testing extremely small or large ε's requires extremely
+                    // many trials to achieve the desired confidence)
+        val εd = ε*(1 - 1.0 / agent.instances.numberOfActions)
 
         val trials = for 
           s_t  <- agent.initialize
           a_tt <- vf.chooseAction (ε) (q) (agent.observe (s_t))
         yield a_tt != vf.bestAction (q) (agent.observe (s_t))
 
-        // We implement this as a bayesian test, checking whether htere is
-        // 0.95 belief that the probability of suboptimal action is ≤ ε.
-        // We check this by computing the posterior and asking CDF (ε) ≥ 0.94
+        // We implement this as a Bayesian test, checking whether a small ROPE
+        // εd ± 0.005 attracts more than 0.95 belief.
 
         val successes = trials.take (n).count { _ == true }
         val failures = n - successes
 
         // α=1 and β=1 gives a prior, weak flat, unbiased
-        val cdfEpsilon =  Beta (2 + successes, 2 + failures).cdf (ε)
-
-        (cdfEpsilon >= 0.94) :| 
-          s"""|The beta posterior test results (failing):
-              |    posterior_cdf(${ε}) == $cdfEpsilon
+        val posterior =  Beta (1 + successes, 1 + failures)
+        val ropeHalf = 0.05
+        val (ropeL, ropeR) = (εd - ropeHalf, εd + ropeHalf)
+        val ropePMF = posterior.cdf (ropeR) - posterior.cdf (ropeL)
+
+        (ropePMF >= 0.94) :| 
+          f"""|The beta posterior test results (failing):
+              |    posterior(${εd}) == ${posterior.cdf(εd)}
+              |    ROPE of ${2*ropeHalf} around εd == (${ropeL};${ropeR})
+              |    PMF of ROPE == ${ropePMF}
               |    #exploration selections ≠ best action: $successes 
               |    #best action selections: $failures 
               |    #total trials: $n""".stripMargin
@@ -153,8 +161,8 @@ case class ConcreteExpectedSarsaLaws[State, ObservableState, Action]
 
          // Is the expected difference of the updates close to zero?
          val tolerance = 0.025
-         val ci_mass = symsim.concrete.GaussianMass (μ_post_diff, σ2_post_diff) 
-           (-tolerance, +tolerance)
+         val ci_mass = Gaussian (μ_post_diff, σ2_post_diff)
+           .probability (-tolerance, +tolerance)
 
          val msg = s"""|The normal posterior test results (failing):
                        |    ci mass == ${ci_mass}

diff --git a/src/main/scala/symsim/laws/ConcreteSarsaLaws.scala b/src/main/scala/symsim/laws/ConcreteSarsaLaws.scala
@@ -1,7 +1,7 @@
 package symsim
 package laws
 
-import breeze.stats.distributions.{Rand, Beta}
+import breeze.stats.distributions.{Rand, Beta, Gaussian, Binomial}
 import breeze.stats.distributions.Rand.VariableSeed.*
 import scala.language.postfixOps
 
@@ -50,37 +50,89 @@ case class ConcreteSarsaLaws[State, ObservableState, Action]
   given Arbitrary[Q] = 
     Arbitrary (vf.genVF (using agent.instances.arbitraryReward))
 
+  val δ = 0.01 // addmitted tail for the test of #successes / explorations
+
   val laws: RuleSet = SimpleRuleSet (
     "concreteSarsa",
 
-    "probability of not choosing the best action is smaller than ε" ->
-      forAllNoShrink { (q: Q, a_t: Action) =>
+    f"#explorations is not in the ${2*δ}-tail of binomial likelihood" ->
+      forAllNoShrink { (q: Q, a_t: Action, n0: Int) =>
+
+        val n = 100 + (n0 % 3000).abs
+        assert (n >= 100 && n < 3100)
 
-        val n = 4000
-        val ε = 0.1 // Ignore ε in the problem as it might be zero for
-                    // the sake of the other test
+        val ε = 0.5 // Ignore ε in the problem as it might be zero for
+                    // the sake of the other test. 
+
+        // The probability of choosing an exploring (suboptimal) action 
+        val εd = ε * (1 - 1.0 / agent.instances.numberOfActions)
 
         val trials = for 
           s_t  <- agent.initialize
           a_tt <- vf.chooseAction (ε) (q) (agent.observe (s_t))
         yield a_tt != vf.bestAction (q) (agent.observe (s_t))
 
-        // We implement this as a Bayesian test, checking whether htere is
-        // 0.95 belief that the probability of suboptimal action is ≤ ε.
-        // We check this by computing the posterior and asking CDF (ε) ≥ 0.94
+        val successes = trials.take (n).count { _ == true }
+
+        // calculate a "cdf" for number of successes 
+        val distribution = Binomial (n, εd)
+        val fakeCdf = LazyList.tabulate (n+1) { distribution.probabilityOf }
+          .scanLeft[(Int, Double)] (-1, 0.0) { case ((k, z), p) =>  (k+1, p + z) } 
+          .tail // drop the head (which is the initial seed)
+          .toMap
+
+        val pmfTrialsBelow = fakeCdf (successes) 
+        { δ <= pmfTrialsBelow && pmfTrialsBelow <= 1-δ } :| 
+          f"""|P(k <= $successes) <= $pmfTrialsBelow
+              |    $successes successes out of $n trials
+              |    probability of success ${εd}
+              |    P(k <= ${n}) = ${fakeCdf (n)}""".stripMargin
+
+    },
+
+    "probability of not choosing the best action is ε(1-|Action|^-1)" ->
+      forAllNoShrink { (q: Q, a_t: Action) =>
+
+        val n = 3000
+        val ε = 0.5 // Ignore ε in the problem as it might be zero for
+                    // the sake of the other test. 
+                    // Also a central ε makes the Bernouli test much stronger
+                    // (testing extremely small or large ε's requires extremely
+                    // many trials to achieve the desired confidence)
+
+        // the probability of choosing an exploring (suboptimal) action should be
+        val εd = ε*(1 - 1.0 / agent.instances.numberOfActions)
+
+        val trials = for 
+          s_t  <- agent.initialize
+          a_tt <- vf.chooseAction (ε) (q) (agent.observe (s_t))
+        yield a_tt != vf.bestAction (q) (agent.observe (s_t))
+
+        // We implement this as a Bayesian test, checking whether a small ROPE
+        // εd ± 0.05 attracts more than 0.95 belief.
+        // The HDI calculation seems not to be needed. Since the entire HDI
+        // has to be inside the ROPE we can just measure the ROPE's mass.
+        // It should be larger than the required confidence mass (p)
 
         val successes = trials.take (n).count { _ == true }
         val failures = n - successes
 
         // α=1 and β=1 gives a prior, weak flat, unbiased
-        val cdfEpsilon =  Beta (2 + successes, 2 + failures).cdf (ε)
-
-        (cdfEpsilon >= 0.94) :| 
-          s"""|The beta posterior test results (failing):
-              |    posterior_cdf(${ε}) == $cdfEpsilon
-              |    #exploration selections ≠ best action: $successes 
-              |    #best action selections: $failures 
-              |    #total trials: $n""".stripMargin
+        val p = 0.95
+        val posterior = Beta (1 + successes, 1 + failures)
+        val (ciL, ciR) =  (εd - 0.05, εd + 0.05) 
+
+        (posterior.probability (ciL, ciR) >= p) :|
+          f"""|The beta posterior test results (failing):
+              |    posterior(${εd}) == ${posterior.pdf(εd)}
+              |    posterior mode: ${posterior.mode}
+              |    ROPE [$ciL;$ciR] has mass ${posterior.probability (ciL, ciR)}
+              |    total trials: $n
+              |    exploration selections (success): $successes 
+              |    best action selections (failure): $failures""".stripMargin
+
+       // 2. I am still seeing negative left point of the Beta HDI, so there is
+       //    a bug in that function? TODO
     },
 
     // TODO: this test is alsmost the same as for ConcreteExpectedSarsaLaws.
@@ -139,8 +191,8 @@ case class ConcreteSarsaLaws[State, ObservableState, Action]
 
          // Is the expected difference of the updates close to zero?
          val tolerance = 0.025
-         val ci_mass = symsim.concrete.GaussianMass (μ_post_diff, σ2_post_diff) 
-           (-tolerance, +tolerance)
+         val ci_mass = Gaussian (μ_post_diff, σ2_post_diff)
+           .probability (-tolerance, +tolerance)
 
          val msg = s"""|The normal posterior test results (failing):
                        |    ci mass == ${ci_mass}

diff --git a/src/test/scala/symsim/concrete/StatsSpec.scala b/src/test/scala/symsim/concrete/StatsSpec.scala
@@ -0,0 +1,14 @@
+package symsim
+package concrete
+
+/** Sanity tests for Stats */
+class StatsSpec 
+  extends org.scalacheck.Properties ("concrete.Stats"):
+
+  // The oracle values here are empirically established (I am soo
+  // lazy). They roughly agree with the plot at 
+  // https://stats.stackexchange.com/questions/160020/credible-set-for-beta-distribution/160035#160035
+  property ("A specific test case for betaHdi") = 
+    val (l, r) = betaHdi (11, 4, 0.95)
+    (l - .5).abs <= 0.01 && (r - 0.91).abs <= 0.01
+