From 590b0672698251074961b79333d8f79859d96dc6 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Fri, 30 Oct 2020 17:04:06 +0100
Subject: [PATCH 01/30] unify inits

---
 src/pyhf/infer/calculators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 37d3c8ec64..aa98658305 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -551,7 +551,7 @@ def distributions(self, poi_test, track_progress=None):
                     poi_test,
                     sample,
                     self.pdf,
-                    signal_pars,
+                    self.init_pars,
                     self.par_bounds,
                     self.fixed_params,
                 )
@@ -564,7 +564,7 @@ def distributions(self, poi_test, track_progress=None):
                     poi_test,
                     sample,
                     self.pdf,
-                    bkg_pars,
+                    self.init_pars,
                     self.par_bounds,
                     self.fixed_params,
                 )

From a8b578b76d0981c5c858f166453ffa488bdb0baa Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Wed, 4 Nov 2020 11:51:18 +0100
Subject: [PATCH 02/30] add ROOT cross-check

---
 validation/StandardHypoTestDemo.py  |  55 +++++++++++
 validation/xmlimport_input_bkg.json | 146 ++++++++++++++++++++++++++++
 2 files changed, 201 insertions(+)
 create mode 100644 validation/StandardHypoTestDemo.py
 create mode 100644 validation/xmlimport_input_bkg.json

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
new file mode 100644
index 0000000000..e5551cde6c
--- /dev/null
+++ b/validation/StandardHypoTestDemo.py
@@ -0,0 +1,55 @@
+import ROOT
+
+def StandardHypoTestDemo(
+    infile,
+    ntoys,
+    workspaceName = "combined",
+    modelSBName = "ModelConfig",
+    dataName = "obsData",
+):
+
+    file = ROOT.TFile.Open(infile);
+    w = file.Get(workspaceName);
+    sbModel = w.obj(modelSBName);
+    data = w.data(dataName);
+
+    bModel = sbModel.Clone();
+    bModel.SetName(modelSBName+"B_only");
+    var = (bModel.GetParametersOfInterest().first());
+    oldval = var.getVal();
+    var.setVal(0);
+    bModel.SetSnapshot( ROOT.RooArgSet(var)  );
+    var.setVal(oldval);
+
+    var = sbModel.GetParametersOfInterest().first();
+    oldval = var.getVal();
+    sbModel.SetSnapshot( ROOT.RooArgSet(var) );
+
+    profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf());
+    profll.SetOneSidedDiscovery(False);
+    profll.SetOneSided(True);
+    hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel);
+    # profll.SetPrintLevel(2)
+
+
+    print('by hand',profll.Evaluate(data,ROOT.RooArgSet(var) ))
+
+    hypoCalc.SetToys(ntoys, ntoys)
+
+    sampler = hypoCalc.GetTestStatSampler();
+    sampler.SetTestStatistic(profll);
+
+    paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest())
+    
+    htr = hypoCalc.GetHypoTest();
+    htr.SetPValueIsRightTail(True);
+    htr.SetBackgroundAsAlt(True);
+    htr.Print(); 
+    plot = ROOT.RooStats.HypoTestPlot(htr,100);
+    plot.SetLogYaxis(True);
+    plot.Draw();
+    ROOT.gPad.SaveAs("plot.png");
+
+import sys
+if __name__ == '__main__':
+    StandardHypoTestDemo(sys.argv[1],int(sys.argv[2]) if len(sys.argv) > 2 else 2000)
diff --git a/validation/xmlimport_input_bkg.json b/validation/xmlimport_input_bkg.json
new file mode 100644
index 0000000000..e515969261
--- /dev/null
+++ b/validation/xmlimport_input_bkg.json
@@ -0,0 +1,146 @@
+{
+    "channels": [
+        {
+            "name": "channel1",
+            "samples": [
+                {
+                    "data": [
+                        20.0,
+                        10.0
+                    ],
+                    "modifiers": [
+                        {
+                            "data": {
+                                "hi": 1.05,
+                                "lo": 0.95
+                            },
+                            "name": "syst1",
+                            "type": "normsys"
+                        },
+                        {
+                            "data": null,
+                            "name": "SigXsecOverSM",
+                            "type": "normfactor"
+                        }
+                    ],
+                    "name": "signal"
+                },
+                {
+                    "data": [
+                        100.0,
+                        0.0
+                    ],
+                    "modifiers": [
+                        {
+                            "data": null,
+                            "name": "lumi",
+                            "type": "lumi"
+                        },
+                        {
+                            "data": [
+                                5.000000074505806,
+                                0.0
+                            ],
+                            "name": "staterror_channel1",
+                            "type": "staterror"
+                        },
+                        {
+                            "data": {
+                                "hi": 1.05,
+                                "lo": 0.95
+                            },
+                            "name": "syst2",
+                            "type": "normsys"
+                        }
+                    ],
+                    "name": "background1"
+                },
+                {
+                    "data": [
+                        0.0,
+                        100.0
+                    ],
+                    "modifiers": [
+                        {
+                            "data": null,
+                            "name": "lumi",
+                            "type": "lumi"
+                        },
+                        {
+                            "data": [
+                                0.0,
+                                10.0
+                            ],
+                            "name": "staterror_channel1",
+                            "type": "staterror"
+                        },
+                        {
+                            "data": {
+                                "hi": 1.05,
+                                "lo": 0.95
+                            },
+                            "name": "syst3",
+                            "type": "normsys"
+                        }
+                    ],
+                    "name": "background2"
+                }
+            ]
+        }
+    ],
+    "measurements": [
+        {
+            "config": {
+                "parameters": [
+                    {
+                        "auxdata": [
+                            1.0
+                        ],
+                        "bounds": [
+                            [
+                                0.5,
+                                1.5
+                            ]
+                        ],
+                        "fixed": true,
+                        "inits": [
+                            1.0
+                        ],
+                        "name": "lumi",
+                        "sigmas": [
+                            0.1
+                        ]
+                    },
+                    {
+                        "bounds": [
+                            [
+                                0.0,
+                                3.0
+                            ]
+                        ],
+                        "inits": [
+                            1.0
+                        ],
+                        "name": "SigXsecOverSM"
+                    },
+                    {
+                        "fixed": true,
+                        "name": "syst1"
+                    }
+                ],
+                "poi": "SigXsecOverSM"
+            },
+            "name": "GaussExample"
+        }
+    ],
+    "observations": [
+        {
+            "data": [
+                100.0,
+                100.0
+            ],
+            "name": "channel1"
+        }
+    ],
+    "version": "1.0.0"
+}

From 1af5697cc17508dc3b1fb347d03050b28ba37640 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Sun, 8 Nov 2020 18:27:15 +0100
Subject: [PATCH 03/30] better fix

---
 src/pyhf/infer/__init__.py         | 56 +++++------------
 src/pyhf/infer/calculators.py      | 48 ++++++++++++++-
 src/pyhf/infer/intervals.py        |  1 +
 tests/test_infer.py                | 18 +++---
 validation/StandardHypoTestDemo.py | 99 ++++++++++++++++++------------
 5 files changed, 133 insertions(+), 89 deletions(-)

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
index 2195c0cf8c..8a2d159b79 100644
--- a/src/pyhf/infer/__init__.py
+++ b/src/pyhf/infer/__init__.py
@@ -141,57 +141,31 @@ def hypotest(
     teststat = calc.teststatistic(poi_test)
     sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
 
-    CLsb = sig_plus_bkg_distribution.pvalue(teststat)
-    CLb = b_only_distribution.pvalue(teststat)
-    CLs = CLsb / CLb
-
-    tensorlib, _ = get_backend()
-    # Ensure that all CL values are 0-d tensors
-    CLsb, CLb, CLs = (
-        tensorlib.astensor(CLsb),
-        tensorlib.astensor(CLb),
-        tensorlib.astensor(CLs),
+    pvalues = CLsb_obs, CLb_obs, CLs_obs = calc.pvalues(
+        teststat, sig_plus_bkg_distribution, b_only_distribution
+    )
+    tb, _ = get_backend()
+    CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues)
+    CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
+        sig_plus_bkg_distribution, b_only_distribution
     )
 
     is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'
 
-    _returns = [CLsb if is_q0 else CLs]
+    _returns = [CLsb_obs if is_q0 else CLs_obs]
     if return_tail_probs:
         if is_q0:
-            _returns.append([CLb])
+            _returns.append([CLb_obs])
         else:
-            _returns.append([CLsb, CLb])
+            _returns.append([CLsb_obs, CLb_obs])
+
+    pvalues_exp = CLsb_exp if is_q0 else CLs_exp
     if return_expected_set:
-        CLs_exp = []
-        for n_sigma in [2, 1, 0, -1, -2]:
-
-            expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
-
-            if is_q0:
-                # despite the name in this case this is the discovery p value
-                CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
-            else:
-                CLs = sig_plus_bkg_distribution.pvalue(
-                    expected_bonly_teststat
-                ) / b_only_distribution.pvalue(expected_bonly_teststat)
-            CLs_exp.append(tensorlib.astensor(CLs))
         if return_expected:
-            _returns.append(CLs_exp[2])
-        _returns.append(CLs_exp)
+            _returns.append(tb.astensor(pvalues_exp[2]))
+        _returns.append(pvalues_exp)
     elif return_expected:
-        n_sigma = 0
-        expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
-
-        if is_q0:
-            # despite the name in this case this is the discovery p value
-            CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
-        else:
-            CLs = sig_plus_bkg_distribution.pvalue(
-                expected_bonly_teststat
-            ) / b_only_distribution.pvalue(expected_bonly_teststat)
-
-        _returns.append(tensorlib.astensor(CLs))
-
+        _returns.append(tb.astensor(pvalues_exp[2]))
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]
 
diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index aa98658305..4daa47bfe6 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -306,6 +306,23 @@ def _false_case():
             )
         return teststat
 
+    def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+        CLsb = sig_plus_bkg_distribution.pvalue(teststat)
+        CLb = b_only_distribution.pvalue(teststat)
+        CLs = CLsb / CLb
+        return CLsb, CLb, CLs
+
+    def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+        tb, _ = get_backend()
+        return tb.astensor(
+            [
+                self.pvalues(v, sig_plus_bkg_distribution, b_only_distribution)
+                for v in [
+                    b_only_distribution.expected_value(x) for x in [2, 1, 0, -1, -2]
+                ]
+            ]
+        ).T
+
 
 class EmpiricalDistribution:
     """
@@ -378,7 +395,11 @@ def pvalue(self, value):
         """
         tensorlib, _ = get_backend()
         return (
-            tensorlib.sum(tensorlib.where(self.samples >= value, 1, 0))
+            tensorlib.sum(
+                tensorlib.where(
+                    self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0)
+                )
+            )
             / tensorlib.shape(self.samples)[0]
         )
 
@@ -574,6 +595,31 @@ def distributions(self, poi_test, track_progress=None):
         b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))
         return s_plus_b, b_only
 
+    def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+        CLsb = sig_plus_bkg_distribution.pvalue(teststat)
+        CLb = b_only_distribution.pvalue(teststat)
+        CLs = CLsb / CLb
+        return CLsb, CLb, CLs
+
+    def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+        tb, _ = get_backend()
+        pvalues = tb.astensor(
+            [
+                self.pvalues(
+                    tb.astensor(x), sig_plus_bkg_distribution, b_only_distribution
+                )
+                for x in b_only_distribution.samples
+            ]
+        )
+        import numpy as np
+
+        pvalues_exp = np.percentile(
+            tb.tolist(pvalues),
+            [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681],
+            axis=0,
+        )
+        return tb.astensor(pvalues_exp).T
+
     def teststatistic(self, poi_test):
         """
         Compute the test statistic for the observed data under the studied model.
diff --git a/src/pyhf/infer/intervals.py b/src/pyhf/infer/intervals.py
index f146f48dba..edb1a0f4cd 100644
--- a/src/pyhf/infer/intervals.py
+++ b/src/pyhf/infer/intervals.py
@@ -55,6 +55,7 @@ def upperlimit(data, model, scan, level=0.05, return_results=False):
     ]
     obs = tb.astensor([[r[0]] for r in results])
     exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results])
+
     result_arrary = tb.concatenate([obs, exp], axis=1).T
 
     # observed limit and the (0, +-1, +-2)sigma expected limits
diff --git a/tests/test_infer.py b/tests/test_infer.py
index 1070b0f64a..391854f675 100644
--- a/tests/test_infer.py
+++ b/tests/test_infer.py
@@ -351,16 +351,16 @@ def test_toy_calculator(tmpdir, hypotest_args):
     )
     assert qtilde_mu_bkg.samples.tolist() == pytest.approx(
         [
-            2.2664625238298868,
-            1.081660885985002,
-            2.757022090389057,
-            1.3835689306226868,
-            0.4707466955809423,
+            2.2664625749100082,
+            1.081660887453154,
+            2.7570218408936853,
+            1.3835691388297846,
+            0.4707467005909507,
             0.0,
-            3.7166483694865065,
-            3.8021896741039427,
-            5.114135403520493,
-            1.35111537136072,
+            3.7166483705294127,
+            3.8021896732709592,
+            5.114135391143066,
+            1.3511153731000718,
         ],
         1e-07,
     )
diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index e5551cde6c..58707ec95c 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -1,55 +1,78 @@
 import ROOT
+import numpy as np
+
 
 def StandardHypoTestDemo(
     infile,
     ntoys,
-    workspaceName = "combined",
-    modelSBName = "ModelConfig",
-    dataName = "obsData",
+    workspaceName="combined",
+    modelSBName="ModelConfig",
+    dataName="obsData",
 ):
 
-    file = ROOT.TFile.Open(infile);
-    w = file.Get(workspaceName);
-    sbModel = w.obj(modelSBName);
-    data = w.data(dataName);
-
-    bModel = sbModel.Clone();
-    bModel.SetName(modelSBName+"B_only");
-    var = (bModel.GetParametersOfInterest().first());
-    oldval = var.getVal();
-    var.setVal(0);
-    bModel.SetSnapshot( ROOT.RooArgSet(var)  );
-    var.setVal(oldval);
-
-    var = sbModel.GetParametersOfInterest().first();
-    oldval = var.getVal();
-    sbModel.SetSnapshot( ROOT.RooArgSet(var) );
-
-    profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf());
-    profll.SetOneSidedDiscovery(False);
-    profll.SetOneSided(True);
-    hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel);
-    # profll.SetPrintLevel(2)
+    file = ROOT.TFile.Open(infile)
+    w = file.Get(workspaceName)
+    sbModel = w.obj(modelSBName)
+    data = w.data(dataName)
+
+    bModel = sbModel.Clone()
+    bModel.SetName(modelSBName + "B_only")
+    var = bModel.GetParametersOfInterest().first()
+    oldval = var.getVal()
+    var.setVal(0)
+    bModel.SetSnapshot(ROOT.RooArgSet(var))
+    var.setVal(oldval)
 
+    var = sbModel.GetParametersOfInterest().first()
+    oldval = var.getVal()
+    sbModel.SetSnapshot(ROOT.RooArgSet(var))
 
-    print('by hand',profll.Evaluate(data,ROOT.RooArgSet(var) ))
+    profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
+    profll.SetOneSidedDiscovery(False)
+    profll.SetOneSided(True)
+    hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel)
+    # profll.SetPrintLevel(2)
+
+    print('by hand', profll.Evaluate(data, ROOT.RooArgSet(var)))
 
     hypoCalc.SetToys(ntoys, ntoys)
 
-    sampler = hypoCalc.GetTestStatSampler();
-    sampler.SetTestStatistic(profll);
+    sampler = hypoCalc.GetTestStatSampler()
+    sampler.SetTestStatistic(profll)
 
     paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest())
-    
-    htr = hypoCalc.GetHypoTest();
-    htr.SetPValueIsRightTail(True);
-    htr.SetBackgroundAsAlt(True);
-    htr.Print(); 
-    plot = ROOT.RooStats.HypoTestPlot(htr,100);
-    plot.SetLogYaxis(True);
-    plot.Draw();
-    ROOT.gPad.SaveAs("plot.png");
+
+    htr = hypoCalc.GetHypoTest()
+    htr.SetPValueIsRightTail(True)
+    htr.SetBackgroundAsAlt(True)
+
+    cls_obs = htr.CLs()
+    ds = htr.GetNullDistribution()
+    db = htr.GetAltDistribution()
+
+    r = ROOT.RooStats.HypoTestResult()
+    r.SetPValueIsRightTail(True)
+    r.SetNullDistribution(ds)
+    r.SetAltDistribution(db)
+
+    values = [
+        1 / (r.SetTestStatisticData(v), r.CLs())[-1]
+        for v in ds.GetSamplingDistribution()
+    ]
+    print(values)
+    values = np.percentile(
+        values, [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]
+    )
+    print(values)
+
+    htr.Print()
+    plot = ROOT.RooStats.HypoTestPlot(htr, 100)
+    plot.SetLogYaxis(True)
+    plot.Draw()
+    ROOT.gPad.SaveAs("plot.png")
+
 
 import sys
+
 if __name__ == '__main__':
-    StandardHypoTestDemo(sys.argv[1],int(sys.argv[2]) if len(sys.argv) > 2 else 2000)
+    StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000)

From ba664ad2cbbec80a5f427f92f8a32dea504c7953 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Sun, 8 Nov 2020 19:01:20 +0100
Subject: [PATCH 04/30] better fix

---
 validation/StandardHypoTestDemo.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index 58707ec95c..ee617e5a86 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -40,13 +40,10 @@ def StandardHypoTestDemo(
     sampler = hypoCalc.GetTestStatSampler()
     sampler.SetTestStatistic(profll)
 
-    paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest())
-
     htr = hypoCalc.GetHypoTest()
     htr.SetPValueIsRightTail(True)
     htr.SetBackgroundAsAlt(True)
 
-    cls_obs = htr.CLs()
     ds = htr.GetNullDistribution()
     db = htr.GetAltDistribution()
 

From 9edfdea3b5aba7a195cc2d3a734b952d2a33ba72 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Sun, 8 Nov 2020 19:19:16 +0100
Subject: [PATCH 05/30] better fix

---
 src/pyhf/infer/calculators.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 4daa47bfe6..49d516bde2 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -307,12 +307,14 @@ def _false_case():
         return teststat
 
     def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+        '''calculate pvalues.'''
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
         CLb = b_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb
         return CLsb, CLb, CLs
 
     def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+        '''calculate expected pvalues.'''
         tb, _ = get_backend()
         return tb.astensor(
             [
@@ -596,12 +598,14 @@ def distributions(self, poi_test, track_progress=None):
         return s_plus_b, b_only
 
     def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+        '''calculate pvalues.'''
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
         CLb = b_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb
         return CLsb, CLb, CLs
 
     def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+        '''calculate expected pvalues.'''
         tb, _ = get_backend()
         pvalues = tb.astensor(
             [

From 4f4a2585eb9396830f30394a896bd6726d18cdd6 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Sun, 8 Nov 2020 20:48:49 +0100
Subject: [PATCH 06/30] better fix

---
 src/pyhf/infer/__init__.py         | 4 +---
 validation/StandardHypoTestDemo.py | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
index 8a2d159b79..b2e457e29b 100644
--- a/src/pyhf/infer/__init__.py
+++ b/src/pyhf/infer/__init__.py
@@ -141,9 +141,7 @@ def hypotest(
     teststat = calc.teststatistic(poi_test)
     sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
 
-    pvalues = CLsb_obs, CLb_obs, CLs_obs = calc.pvalues(
-        teststat, sig_plus_bkg_distribution, b_only_distribution
-    )
+    pvalues = calc.pvalues(teststat, sig_plus_bkg_distribution, b_only_distribution)
     tb, _ = get_backend()
     CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues)
     CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index ee617e5a86..372cf96e34 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -24,7 +24,6 @@ def StandardHypoTestDemo(
     var.setVal(oldval)
 
     var = sbModel.GetParametersOfInterest().first()
-    oldval = var.getVal()
     sbModel.SetSnapshot(ROOT.RooArgSet(var))
 
     profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())

From 126da495fce154f5567de080b8d716b937b59f22 Mon Sep 17 00:00:00 2001
From: Lukas Heinrich <lukas.heinrich@gmail.com>
Date: Mon, 9 Nov 2020 13:36:41 +0100
Subject: [PATCH 07/30] add run_toys.py

---
 validation/run_toys.py | 61 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 validation/run_toys.py

diff --git a/validation/run_toys.py b/validation/run_toys.py
new file mode 100644
index 0000000000..8eb152f752
--- /dev/null
+++ b/validation/run_toys.py
@@ -0,0 +1,61 @@
+import ROOT
+import sys
+import json
+
+infile = sys.argv[1]
+ntoys = int(sys.argv[2]) if len(sys.argv) > 2 else 2000
+
+infile = ROOT.TFile.Open(infile)
+workspace = infile.Get("combined")
+data = workspace.data("obsData")
+
+sbModel = workspace.obj("ModelConfig")
+poi = sbModel.GetParametersOfInterest().first()
+
+sbModel.SetSnapshot(ROOT.RooArgSet(poi))
+
+bModel = sbModel.Clone()
+bModel.SetName("bonly")
+poi.setVal(0)
+bModel.SetSnapshot(ROOT.RooArgSet(poi))
+
+ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel)
+ac.SetToys(ntoys, ntoys)
+
+profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
+profll.SetOneSidedDiscovery(False)
+profll.SetOneSided(True)
+ac.GetTestStatSampler().SetTestStatistic(profll)
+
+calc = ROOT.RooStats.HypoTestInverter(ac)
+calc.SetConfidenceLevel(0.95)
+calc.UseCLs(True)
+
+npoints = 2
+
+calc.RunFixedScan(npoints, 1.0, 1.2)
+
+result = calc.GetInterval() 
+
+plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result)
+
+
+data = []
+for i in range(npoints):
+    d = {
+        'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()),
+        'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()),
+        'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution())
+    }
+    data.append(d)
+
+json.dump(data,open('scan.json','w'))
+
+c = ROOT.TCanvas()
+c.SetLogy(False)
+plot.Draw("OBS EXP CLb 2CL")
+c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0,0.2)
+# c.GetYaxis().SetRangeUser(0,0.2)
+c.Draw()
+c.SaveAs('scan.pdf')
+

From 02d0a74b9523f66f0d68af5f2a38c4d96f8fc768 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 9 Nov 2020 12:37:17 +0000
Subject: [PATCH 08/30] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 validation/run_toys.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/validation/run_toys.py b/validation/run_toys.py
index 8eb152f752..39d4f269ca 100644
--- a/validation/run_toys.py
+++ b/validation/run_toys.py
@@ -35,7 +35,7 @@
 
 calc.RunFixedScan(npoints, 1.0, 1.2)
 
-result = calc.GetInterval() 
+result = calc.GetInterval()
 
 plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result)
 
@@ -45,17 +45,16 @@
     d = {
         'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()),
         'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()),
-        'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution())
+        'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()),
     }
     data.append(d)
 
-json.dump(data,open('scan.json','w'))
+json.dump(data, open('scan.json', 'w'))
 
 c = ROOT.TCanvas()
 c.SetLogy(False)
 plot.Draw("OBS EXP CLb 2CL")
-c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0,0.2)
+c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2)
 # c.GetYaxis().SetRangeUser(0,0.2)
 c.Draw()
 c.SaveAs('scan.pdf')
-

From 5f84190b392a2502f647bf0d735ca433c61d3de5 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 22:16:28 -0600
Subject: [PATCH 09/30] Add docstring for asymptotic pvalues

---
 src/pyhf/infer/calculators.py | 40 ++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 49d516bde2..ad6d429402 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -211,8 +211,8 @@ def distributions(self, poi_test):
             >>> mu_test = 1.0
             >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde")
             >>> _ = asymptotic_calculator.teststatistic(mu_test)
-            >>> qmu_sig, qmu_bkg = asymptotic_calculator.distributions(mu_test)
-            >>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test)
+            >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
+            >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test)
             (0.002192624107163899, 0.15865525393145707)
 
         Args:
@@ -307,7 +307,41 @@ def _false_case():
         return teststat
 
     def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
-        '''calculate pvalues.'''
+        r"""
+        Calculate the :math:`p`-values for the observed test statistic under the
+        signal + background and background-only model hypotheses.
+
+        Example:
+
+            >>> import pyhf
+            >>> pyhf.set_backend("numpy")
+            >>> model = pyhf.simplemodels.hepdata_like(
+            ...     signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
+            ... )
+            >>> observations = [51, 48]
+            >>> data = observations + model.config.auxdata
+            >>> mu_test = 1.0
+            >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
+            ...     data, model, qtilde=True
+            ... )
+            >>> q_tilde = asymptotic_calculator.teststatistic(mu_test)
+            >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
+            >>> CLsb, CLb, CLs = asymptotic_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist)
+            >>> CLsb, CLb, CLs
+            (0.023325019427864607, 0.4441593996111411, 0.05251497423736956)
+
+        Args:
+            teststat (:obj:`tensor`): The test statistic.
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the signal + background hypothesis.
+            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the background-only hypothesis.
+
+        Returns:
+            Tuple (:obj:`float`): The :math:`p`-values for the test statistic
+            corresponding to the :math:`\mathrm{CL}_{s+b}`,
+            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
+        """
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
         CLb = b_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb

From 9c65eef75164a85a56a6a4d275608bb1d6fcd21a Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 22:34:10 -0600
Subject: [PATCH 10/30] Add docstrings for expected_pvalues

---
 src/pyhf/infer/calculators.py | 37 ++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index ad6d429402..c7a8f772dd 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -348,7 +348,42 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
         return CLsb, CLb, CLs
 
     def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
-        '''calculate expected pvalues.'''
+        r"""
+        Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
+        median significance of variations of the signal strength from the
+        background only hypothesis :math:`\left(\mu=0\right)` at
+        :math:`(-2,-1,0,1,2)\sigma`.
+
+        Example:
+
+            >>> import pyhf
+            >>> pyhf.set_backend("numpy")
+            >>> model = pyhf.simplemodels.hepdata_like(
+            ...     signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
+            ... )
+            >>> observations = [51, 48]
+            >>> data = observations + model.config.auxdata
+            >>> mu_test = 1.0
+            >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
+            ...     data, model, qtilde=True
+            ... )
+            >>> _ = asymptotic_calculator.teststatistic(mu_test)
+            >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
+            >>> CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
+            >>> CLs_exp_band
+            [0.0026062609501074576, 0.01382005356161206, 0.06445320535890459, 0.23525643861460702, 0.573036205919389]
+
+        Args:
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the signal + background hypothesis.
+            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the background-only hypothesis.
+
+        Returns:
+            Tuple (:obj:`float`): The :math:`p`-values for the test statistic
+            corresponding to the :math:`\mathrm{CL}_{s+b}`,
+            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
+        """
         tb, _ = get_backend()
         return tb.astensor(
             [

From b8a008230533247719e831d4b2626fb4434a773f Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 22:39:06 -0600
Subject: [PATCH 11/30] more explicit var names

---
 src/pyhf/infer/calculators.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index c7a8f772dd..613772aad1 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -384,12 +384,14 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
             corresponding to the :math:`\mathrm{CL}_{s+b}`,
             :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
         """
+        # Calling pvalues is easier then repeating the CLs calculation here
         tb, _ = get_backend()
         return tb.astensor(
             [
-                self.pvalues(v, sig_plus_bkg_distribution, b_only_distribution)
-                for v in [
-                    b_only_distribution.expected_value(x) for x in [2, 1, 0, -1, -2]
+                self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution)
+                for test_stat in [
+                    b_only_distribution.expected_value(n_sigma)
+                    for n_sigma in [2, 1, 0, -1, -2]
                 ]
             ]
         ).T

From 904f6522cf9a95f5b72d2f494adab3b4b2d69d1a Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 23:09:09 -0600
Subject: [PATCH 12/30] Add docstrings for emperical distributions methods

---
 src/pyhf/infer/calculators.py | 80 +++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 613772aad1..89c6c84f58 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -602,8 +602,8 @@ def distributions(self, poi_test, track_progress=None):
             >>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
             ...     data, model, ntoys=100, track_progress=False
             ... )
-            >>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test)
-            >>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test)
+            >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
+            >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test)
             (0.14, 0.76)
 
         Args:
@@ -669,14 +669,86 @@ def distributions(self, poi_test, track_progress=None):
         return s_plus_b, b_only
 
     def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
-        '''calculate pvalues.'''
+        r"""
+        Calculate the :math:`p`-values for the observed test statistic under the
+        signal + background and background-only model hypotheses.
+
+        Example:
+
+            >>> import pyhf
+            >>> import numpy.random as random
+            >>> random.seed(0)
+            >>> pyhf.set_backend("numpy")
+            >>> model = pyhf.simplemodels.hepdata_like(
+            ...     signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
+            ... )
+            >>> observations = [51, 48]
+            >>> data = observations + model.config.auxdata
+            >>> mu_test = 1.0
+            >>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
+            ...     data, model, ntoys=100, track_progress=False
+            ... )
+            >>> q_tilde = toy_calculator.teststatistic(mu_test)
+            >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
+            >>> CLsb, CLb, CLs = toy_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist)
+            >>> CLsb, CLb, CLs
+            (0.01, 0.41, 0.024390243902439025)
+
+        Args:
+            teststat (:obj:`tensor`): The test statistic.
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the signal + background hypothesis.
+            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the background-only hypothesis.
+
+        Returns:
+            Tuple (:obj:`float`): The :math:`p`-values for the test statistic
+            corresponding to the :math:`\mathrm{CL}_{s+b}`,
+            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
+        """
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
         CLb = b_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb
         return CLsb, CLb, CLs
 
     def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
-        '''calculate expected pvalues.'''
+        r"""
+        Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
+        median significance of variations of the signal strength from the
+        background only hypothesis :math:`\left(\mu=0\right)` at
+        :math:`(-2,-1,0,1,2)\sigma`.
+
+        Example:
+
+            >>> import pyhf
+            >>> import numpy.random as random
+            >>> random.seed(0)
+            >>> pyhf.set_backend("numpy")
+            >>> model = pyhf.simplemodels.hepdata_like(
+            ...     signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
+            ... )
+            >>> observations = [51, 48]
+            >>> data = observations + model.config.auxdata
+            >>> mu_test = 1.0
+            >>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
+            ...     data, model, ntoys=100, track_progress=False
+            ... )
+            >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
+            >>> CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
+            >>> CLs_exp_band
+            [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0]
+
+        Args:
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the signal + background hypothesis.
+            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+              The distribution for the background-only hypothesis.
+
+        Returns:
+            Tuple (:obj:`float`): The :math:`p`-values for the test statistic
+            corresponding to the :math:`\mathrm{CL}_{s+b}`,
+            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
+        """
         tb, _ = get_backend()
         pvalues = tb.astensor(
             [

From a82dbb3078d767cbd89581af0ced23508d83f23a Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 23:10:12 -0600
Subject: [PATCH 13/30] Note Issues for percentile in tensorlib

---
 src/pyhf/infer/calculators.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 89c6c84f58..8c705f7fe4 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -753,11 +753,15 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         pvalues = tb.astensor(
             [
                 self.pvalues(
-                    tb.astensor(x), sig_plus_bkg_distribution, b_only_distribution
+                    tb.astensor(test_stat),
+                    sig_plus_bkg_distribution,
+                    b_only_distribution,
                 )
-                for x in b_only_distribution.samples
+                for test_stat in b_only_distribution.samples
             ]
         )
+        # TODO: Add percentile to tensorlib
+        # c.f. Issue #815, PR #817
         import numpy as np
 
         pvalues_exp = np.percentile(

From 99edeae539f17e1224c0f0d2829d4025c302ca57 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 23:10:36 -0600
Subject: [PATCH 14/30] Make floats instead of tesnors for consistency

Check on this
---
 docs/api.rst                  | 1 +
 src/pyhf/infer/calculators.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index a7fa872067..e47f1256c6 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -121,6 +121,7 @@ Inference
 
 .. autosummary::
    :toctree: _generated/
+   :template: modifierclass.rst
 
    test_statistics.q0
    test_statistics.qmu
diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 8c705f7fe4..f6a013b969 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -768,8 +768,8 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
             tb.tolist(pvalues),
             [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681],
             axis=0,
-        )
-        return tb.astensor(pvalues_exp).T
+        ).T.tolist()
+        return pvalues_exp
 
     def teststatistic(self, poi_test):
         """

From 5ef53527ba3a3bf5bdde05b6ff03af24ec73973b Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 23:23:16 -0600
Subject: [PATCH 15/30] Simplify and use more explicit naming

Change name from CLs_exp to CLs_exp_band to make clear not a singular
value
---
 src/pyhf/infer/__init__.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
index b2e457e29b..36881b8c0d 100644
--- a/src/pyhf/infer/__init__.py
+++ b/src/pyhf/infer/__init__.py
@@ -141,9 +141,13 @@ def hypotest(
     teststat = calc.teststatistic(poi_test)
     sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
 
-    pvalues = calc.pvalues(teststat, sig_plus_bkg_distribution, b_only_distribution)
     tb, _ = get_backend()
-    CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues)
+    CLsb_obs, CLb_obs, CLs_obs = tuple(
+        tb.astensor(pvalue)
+        for pvalue in calc.pvalues(
+            teststat, sig_plus_bkg_distribution, b_only_distribution
+        )
+    )
     CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
         sig_plus_bkg_distribution, b_only_distribution
     )
@@ -157,13 +161,13 @@ def hypotest(
         else:
             _returns.append([CLsb_obs, CLb_obs])
 
-    pvalues_exp = CLsb_exp if is_q0 else CLs_exp
+    pvalues_exp_band = CLsb_exp if is_q0 else CLs_exp
     if return_expected_set:
         if return_expected:
-            _returns.append(tb.astensor(pvalues_exp[2]))
-        _returns.append(pvalues_exp)
+            _returns.append(tb.astensor(pvalues_exp_band[2]))
+        _returns.append(pvalues_exp_band)
     elif return_expected:
-        _returns.append(tb.astensor(pvalues_exp[2]))
+        _returns.append(tb.astensor(pvalues_exp_band[2]))
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]
 

From 8d4e8163ee514b60604c1073abcacf3e6040d741 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Mon, 9 Nov 2020 23:32:39 -0600
Subject: [PATCH 16/30] Correct distribution type

---
 src/pyhf/infer/calculators.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index f6a013b969..f4fc723d97 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -696,9 +696,9 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
 
         Args:
             teststat (:obj:`tensor`): The test statistic.
-            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:
@@ -739,9 +739,9 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
             [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0]
 
         Args:
-            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:

From affac46259b03302d5982e98e9d1436635f0bbf7 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Tue, 10 Nov 2020 01:20:48 -0600
Subject: [PATCH 17/30] Make clear purpose of percentiles

---
 src/pyhf/infer/calculators.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index f4fc723d97..847a48003a 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -764,12 +764,13 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         # c.f. Issue #815, PR #817
         import numpy as np
 
-        pvalues_exp = np.percentile(
+        _percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]
+        pvalues_exp_band = np.percentile(
             tb.tolist(pvalues),
-            [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681],
+            _percentiles,
             axis=0,
         ).T.tolist()
-        return pvalues_exp
+        return pvalues_exp_band
 
     def teststatistic(self, poi_test):
         """

From 03c444ab4597f5a739e53ecd29deb05e10e2610c Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Tue, 10 Nov 2020 02:33:13 -0600
Subject: [PATCH 18/30] Add pyhf version to hypo test demo

---
 validation/StandardHypoTestDemo.py | 51 ++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index 372cf96e34..851fd6eec0 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -1,5 +1,9 @@
-import ROOT
+import sys
 import numpy as np
+import json
+import matplotlib.pyplot as plt
+import ROOT
+import pyhf
 
 
 def StandardHypoTestDemo(
@@ -68,7 +72,50 @@ def StandardHypoTestDemo(
     ROOT.gPad.SaveAs("plot.png")
 
 
-import sys
+def pyhf_version(ntoys=5000, seed=0):
+    np.random.seed(seed)
+    with open("validation/xmlimport_input_bkg.json") as ws_json:
+        workspace = pyhf.Workspace(json.load(ws_json))
+
+    model = workspace.model()
+    data = workspace.data(model)
+    toy_calculator = pyhf.infer.utils.create_calculator(
+        "toybased",
+        data,
+        model,
+        ntoys=ntoys,
+    )
+    test_mu = 1.0
+    sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(test_mu)
+    q_tilde = toy_calculator.teststatistic(test_mu)
+
+    bins = np.linspace(0, 8, 100)
+
+    fig, ax = plt.subplots()
+    ax.hist(
+        bkg_dist.samples / 2.0,
+        alpha=0.2,
+        bins=bins,
+        density=True,
+        label=r"$f(\tilde{q}|0)$ Background",
+    )
+    ax.hist(
+        sig_plus_bkg_dist.samples / 2.0,
+        alpha=0.2,
+        bins=bins,
+        density=True,
+        label=r"$f(\tilde{q}|1)$ Signal",
+    )
+    ax.axvline(q_tilde / 2, color="black", label="Observed test statistic")
+    ax.semilogy()
+
+    ax.set_xlabel(r"$\tilde{q}$")
+    ax.set_ylabel(r"$f(\tilde{q}|\mu')$")
+    ax.legend(loc="best")
+
+    fig.savefig("pyhf_version.png")
+
 
 if __name__ == '__main__':
     StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000)
+    # pyhf_version()

From 1cbda0027b53dbf9b1597b2de6e5d26798858e58 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Tue, 10 Nov 2020 11:41:39 -0600
Subject: [PATCH 19/30] add notes on test stat definition

---
 validation/StandardHypoTestDemo.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index 851fd6eec0..ce2b68aae6 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -92,6 +92,7 @@ def pyhf_version(ntoys=5000, seed=0):
     bins = np.linspace(0, 8, 100)
 
     fig, ax = plt.subplots()
+    # Compare to ROOT's choice of test stat being NLL instead of 2*NLL
     ax.hist(
         bkg_dist.samples / 2.0,
         alpha=0.2,
@@ -117,5 +118,7 @@ def pyhf_version(ntoys=5000, seed=0):
 
 
 if __name__ == '__main__':
-    StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000)
+    StandardHypoTestDemo(
+        infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
+    )
     # pyhf_version()

From 3b93c1205d0fe7d253e7bdc4eeb099d4c4a6acf0 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Tue, 10 Nov 2020 11:55:22 -0600
Subject: [PATCH 20/30] Restructure to allow multiple runs

---
 validation/StandardHypoTestDemo.py |  2 +-
 validation/run_toys.py             | 88 ++++++++++++++++--------------
 2 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index ce2b68aae6..89a4fb8cac 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -117,7 +117,7 @@ def pyhf_version(ntoys=5000, seed=0):
     fig.savefig("pyhf_version.png")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     StandardHypoTestDemo(
         infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
     )
diff --git a/validation/run_toys.py b/validation/run_toys.py
index 39d4f269ca..b5f7f2be8a 100644
--- a/validation/run_toys.py
+++ b/validation/run_toys.py
@@ -2,59 +2,65 @@
 import sys
 import json
 
-infile = sys.argv[1]
-ntoys = int(sys.argv[2]) if len(sys.argv) > 2 else 2000
 
-infile = ROOT.TFile.Open(infile)
-workspace = infile.Get("combined")
-data = workspace.data("obsData")
+def run_toys_ROOT(infile, ntoys):
+    infile = ROOT.TFile.Open(infile)
+    workspace = infile.Get("combined")
+    data = workspace.data("obsData")
 
-sbModel = workspace.obj("ModelConfig")
-poi = sbModel.GetParametersOfInterest().first()
+    sbModel = workspace.obj("ModelConfig")
+    poi = sbModel.GetParametersOfInterest().first()
 
-sbModel.SetSnapshot(ROOT.RooArgSet(poi))
+    sbModel.SetSnapshot(ROOT.RooArgSet(poi))
 
-bModel = sbModel.Clone()
-bModel.SetName("bonly")
-poi.setVal(0)
-bModel.SetSnapshot(ROOT.RooArgSet(poi))
+    bModel = sbModel.Clone()
+    bModel.SetName("bonly")
+    poi.setVal(0)
+    bModel.SetSnapshot(ROOT.RooArgSet(poi))
 
-ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel)
-ac.SetToys(ntoys, ntoys)
+    ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel)
+    ac.SetToys(ntoys, ntoys)
 
-profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
-profll.SetOneSidedDiscovery(False)
-profll.SetOneSided(True)
-ac.GetTestStatSampler().SetTestStatistic(profll)
+    profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
+    profll.SetOneSidedDiscovery(False)
+    profll.SetOneSided(True)
+    ac.GetTestStatSampler().SetTestStatistic(profll)
 
-calc = ROOT.RooStats.HypoTestInverter(ac)
-calc.SetConfidenceLevel(0.95)
-calc.UseCLs(True)
+    calc = ROOT.RooStats.HypoTestInverter(ac)
+    calc.SetConfidenceLevel(0.95)
+    calc.UseCLs(True)
 
-npoints = 2
+    npoints = 2
 
-calc.RunFixedScan(npoints, 1.0, 1.2)
+    calc.RunFixedScan(npoints, 1.0, 1.2)
 
-result = calc.GetInterval()
+    result = calc.GetInterval()
 
-plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result)
+    plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result)
 
+    data = []
+    for i in range(npoints):
+        d = {
+            "test_b": list(result.GetAltTestStatDist(i).GetSamplingDistribution()),
+            "test_s": list(result.GetNullTestStatDist(i).GetSamplingDistribution()),
+            "pvals": list(result.GetExpectedPValueDist(i).GetSamplingDistribution()),
+        }
+        data.append(d)
 
-data = []
-for i in range(npoints):
-    d = {
-        'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()),
-        'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()),
-        'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()),
-    }
-    data.append(d)
+    json.dump(data, open("scan.json", "w"))
 
-json.dump(data, open('scan.json', 'w'))
+    canvas = ROOT.TCanvas()
+    canvas.SetLogy(False)
+    plot.Draw("OBS EXP CLb 2CL")
+    canvas.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2)
+    canvas.Draw()
+    extensions = ["pdf", "png"]
+    for ext in extensions:
+        canvas.SaveAs(f"poi_scan_ROOT.{ext}")
 
-c = ROOT.TCanvas()
-c.SetLogy(False)
-plot.Draw("OBS EXP CLb 2CL")
-c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2)
-# c.GetYaxis().SetRangeUser(0,0.2)
-c.Draw()
-c.SaveAs('scan.pdf')
+
+if __name__ == "__main__":
+    run_toys_ROOT(
+        infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
+    )
+    # run_toys_pyhf()

From 4933911dfbaa2e96569b8a7d0a24b4eb0b9c8037 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Thu, 12 Nov 2020 03:18:24 -0600
Subject: [PATCH 21/30] Add temp for debugging

---
 validation/run_toys.py | 51 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/validation/run_toys.py b/validation/run_toys.py
index b5f7f2be8a..965a709772 100644
--- a/validation/run_toys.py
+++ b/validation/run_toys.py
@@ -1,6 +1,10 @@
-import ROOT
 import sys
 import json
+import numpy as np
+import matplotlib.pyplot as plt
+import pyhf
+import pyhf.contrib.viz.brazil as brazil
+import ROOT
 
 
 def run_toys_ROOT(infile, ntoys):
@@ -59,8 +63,45 @@ def run_toys_ROOT(infile, ntoys):
         canvas.SaveAs(f"poi_scan_ROOT.{ext}")
 
 
+def run_toys_pyhf(ntoys=2_000, seed=0):
+    np.random.seed(seed)
+    # with open("validation/xmlimport_input_bkg.json") as ws_json:
+    with open("debug/issue_workpace/issue_ws.json") as ws_json:
+        workspace = pyhf.Workspace(json.load(ws_json))
+
+    model = workspace.model()
+    data = workspace.data(model)
+
+    n_points = 4
+    test_mus = np.linspace(1.0, 1.2, n_points)
+    fit_results = [
+        pyhf.infer.hypotest(
+            mu, data, model, return_expected_set=True, calctype="toybased", ntoys=ntoys
+        )
+        for mu in test_mus
+    ]
+
+    fig, ax = plt.subplots()
+    brazil.plot_results(ax, test_mus, fit_results)
+    ax.set_xlabel(r"$\mu$")
+    ax.set_ylabel(r"$\mathrm{CL}_{s}$")
+    _buffer = 0.02
+    ax.set_xlim(1.0 - _buffer, 1.2 + _buffer)
+    ax.set_ylim(0.0, 0.2)
+
+    extensions = ["pdf", "png"]
+    for ext in extensions:
+        fig.savefig(f"poi_scan_pyhf.{ext}")
+
+    ax.set_ylim(1e-3, 0.2)
+    ax.semilogy()
+
+    for ext in extensions:
+        fig.savefig(f"poi_scan_logy_pyhf.{ext}")
+
+
 if __name__ == "__main__":
-    run_toys_ROOT(
-        infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
-    )
-    # run_toys_pyhf()
+    # run_toys_ROOT(
+    #     infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
+    # )
+    run_toys_pyhf(ntoys=20_000)

From 4f0d43090e196cbb38799a472540aa16f35fe52c Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Wed, 23 Dec 2020 18:45:22 -0600
Subject: [PATCH 22/30] Mark off non-asymptotic calculators in README

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 19fbef79aa..a9ba3f56f5 100644
--- a/README.rst
+++ b/README.rst
@@ -116,6 +116,7 @@ Implemented variations:
   - ☑ ShapeFactor
   - ☑ StatError
   - ☑ Lumi Uncertainty
+  - ☑ Non-asymptotic calculators
 
 Computational Backends:
   - ☑ NumPy
@@ -134,7 +135,6 @@ Todo
 ----
 
 -  ☐ StatConfig
--  ☐ Non-asymptotic calculators
 
 results obtained from this package are validated against output computed
 from HistFactory workspaces

From e66319d493cc79e57b3a1476f83c9ae239e2e965 Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Mon, 18 Jan 2021 19:57:08 -0500
Subject: [PATCH 23/30] fix flake

---
 validation/run_toys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validation/run_toys.py b/validation/run_toys.py
index 965a709772..c8f6b6bb7f 100644
--- a/validation/run_toys.py
+++ b/validation/run_toys.py
@@ -1,4 +1,3 @@
-import sys
 import json
 import numpy as np
 import matplotlib.pyplot as plt
@@ -101,6 +100,7 @@ def run_toys_pyhf(ntoys=2_000, seed=0):
 
 
 if __name__ == "__main__":
+    # import sys
     # run_toys_ROOT(
     #     infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
     # )

From bd3d3bf1aa0e6be9e0f9684c250f2f78cda4b3ab Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Mon, 18 Jan 2021 20:21:31 -0500
Subject: [PATCH 24/30] fix validation

---
 tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_validation.py b/tests/test_validation.py
index 481f349680..5c8363ea42 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -124,7 +124,7 @@ def expected_result_1bin_shapesys_q0():
 @pytest.fixture(scope='module')
 def expected_result_1bin_shapesys_q0_toys():
     expected_result = {
-        "exp": [0.0, 0.0005, 0.0145, 0.1205, 0.403],
+        "exp": [0.0, 0.0005, 0.0145, 0.1205, 0.402761],
         "obs": 0.005,
     }
     return expected_result

From 262d1325fe6d66166b913b87bb526b9a95887716 Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Mon, 18 Jan 2021 20:29:57 -0500
Subject: [PATCH 25/30] fix up the transpose

---
 src/pyhf/infer/__init__.py    | 4 +++-
 src/pyhf/infer/calculators.py | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
index 36881b8c0d..a45d782cc1 100644
--- a/src/pyhf/infer/__init__.py
+++ b/src/pyhf/infer/__init__.py
@@ -161,7 +161,9 @@ def hypotest(
         else:
             _returns.append([CLsb_obs, CLb_obs])
 
-    pvalues_exp_band = CLsb_exp if is_q0 else CLs_exp
+    pvalues_exp_band = [
+        tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp)
+    ]
     if return_expected_set:
         if return_expected:
             _returns.append(tb.astensor(pvalues_exp_band[2]))
diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 847a48003a..1292a834b0 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -386,15 +386,15 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         """
         # Calling pvalues is easier then repeating the CLs calculation here
         tb, _ = get_backend()
-        return tb.astensor(
-            [
+        return zip(
+            *[
                 self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution)
                 for test_stat in [
                     b_only_distribution.expected_value(n_sigma)
                     for n_sigma in [2, 1, 0, -1, -2]
                 ]
             ]
-        ).T
+        )
 
 
 class EmpiricalDistribution:

From b2ce8a39fb704b966c5c2e20531583694ae8e677 Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Mon, 18 Jan 2021 20:47:15 -0500
Subject: [PATCH 26/30] fix up doctest

---
 src/pyhf/infer/calculators.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 1292a834b0..4cc14752c9 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -322,7 +322,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
             >>> data = observations + model.config.auxdata
             >>> mu_test = 1.0
             >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
-            ...     data, model, qtilde=True
+            ...     data, model, test_stat="qtilde"
             ... )
             >>> q_tilde = asymptotic_calculator.teststatistic(mu_test)
             >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
@@ -365,11 +365,11 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
             >>> data = observations + model.config.auxdata
             >>> mu_test = 1.0
             >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
-            ...     data, model, qtilde=True
+            ...     data, model, test_stat="qtilde"
             ... )
             >>> _ = asymptotic_calculator.teststatistic(mu_test)
             >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
-            >>> CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
+            >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
             >>> CLs_exp_band
             [0.0026062609501074576, 0.01382005356161206, 0.06445320535890459, 0.23525643861460702, 0.573036205919389]
 
@@ -386,14 +386,21 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         """
         # Calling pvalues is easier then repeating the CLs calculation here
         tb, _ = get_backend()
-        return zip(
-            *[
-                self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution)
-                for test_stat in [
-                    b_only_distribution.expected_value(n_sigma)
-                    for n_sigma in [2, 1, 0, -1, -2]
-                ]
-            ]
+        return list(
+            map(
+                list,
+                zip(
+                    *[
+                        self.pvalues(
+                            test_stat, sig_plus_bkg_distribution, b_only_distribution
+                        )
+                        for test_stat in [
+                            b_only_distribution.expected_value(n_sigma)
+                            for n_sigma in [2, 1, 0, -1, -2]
+                        ]
+                    ]
+                ),
+            )
         )
 
 
@@ -734,7 +741,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
             ...     data, model, ntoys=100, track_progress=False
             ... )
             >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
-            >>> CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
+            >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
             >>> CLs_exp_band
             [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0]
 

From 169a46ca0d17be7934dbf6091d69ab5df20afcce Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Mon, 18 Jan 2021 21:09:14 -0500
Subject: [PATCH 27/30] update learn notebook for calculators to use the new
 API

---
 .../notebooks/learn/UsingCalculators.ipynb    | 101 ++++++++++++++----
 1 file changed, 80 insertions(+), 21 deletions(-)

diff --git a/docs/examples/notebooks/learn/UsingCalculators.ipynb b/docs/examples/notebooks/learn/UsingCalculators.ipynb
index db7e9e0fbd..f78f33ef05 100644
--- a/docs/examples/notebooks/learn/UsingCalculators.ipynb
+++ b/docs/examples/notebooks/learn/UsingCalculators.ipynb
@@ -206,6 +206,66 @@
     "p_expected"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However, these sorts of steps are somewhat time-consuming and lengthy, and depending on the calculator chosen, may differ a little bit. The calculator API also serves to harmonize the extraction of the observed $p$-values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CL_sb = 0.08389430261678055\n",
+      "CL_b = 0.5\n",
+      "CL_s = CL_sb / CL_b = 0.1677886052335611\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n",
+    "\n",
+    "print(f'CL_sb = {p_sb}')\n",
+    "print(f'CL_b = {p_b}')\n",
+    "print(f'CL_s = CL_sb / CL_b = {p_s}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and the expected $p$-values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exp. CL_sb = [0.0003632946734314658, 0.008671732658283807, 0.08389430261678055, 0.3522160809113284, 0.7325868885677684]\n",
+      "exp. CL_b = [0.022750131948179195, 0.15865525393145707, 0.5, 0.8413447460685429, 0.9772498680518208]\n",
+      "exp. CL_s = CL_sb / CL_b = [0.01596890401598493, 0.05465770873260968, 0.1677886052335611, 0.4186346709326618, 0.7496413276864433]\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n",
+    "\n",
+    "print(f'exp. CL_sb = {p_exp_sb}')\n",
+    "print(f'exp. CL_b = {p_exp_b}')\n",
+    "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -219,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +297,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -255,7 +315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -264,7 +324,7 @@
        "array(1.90259087)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +347,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -311,7 +371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -325,9 +385,7 @@
     }
    ],
    "source": [
-    "p_sb = sb_dist.pvalue(teststat)\n",
-    "p_b = b_dist.pvalue(teststat)\n",
-    "p_s = p_sb / p_b\n",
+    "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n",
     "\n",
     "print(f'CL_sb = {p_sb}')\n",
     "print(f'CL_b = {p_b}')\n",
@@ -343,24 +401,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[0.0, 0.047058823529411764, 0.15, 0.3758865248226951, 1.0]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exp. CL_sb = [0.0, 0.008, 0.084, 0.318, 1.0]\n",
+      "exp. CL_b = [0.024, 0.17, 0.52, 0.846, 1.0]\n",
+      "exp. CL_s = CL_sb / CL_b = [0.0, 0.047058823529411764, 0.16153846153846155, 0.3758865248226951, 1.0]\n"
+     ]
     }
    ],
    "source": [
-    "teststat_expected = [b_dist.expected_value(i) for i in [2, 1, 0, -1, -2]]\n",
-    "p_expected = [sb_dist.pvalue(t) / b_dist.pvalue(t) for t in teststat_expected]\n",
-    "p_expected"
+    "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n",
+    "\n",
+    "print(f'exp. CL_sb = {p_exp_sb}')\n",
+    "print(f'exp. CL_b = {p_exp_b}')\n",
+    "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')"
    ]
   }
  ],

From d5b4a4b328cf968ef6cc22bc82b019c4661580e4 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Wed, 20 Jan 2021 01:32:58 -0600
Subject: [PATCH 28/30] Use bkg_only to be consistent with sig_plus_bkg naming

---
 src/pyhf/infer/__init__.py    |  6 +++---
 src/pyhf/infer/calculators.py | 28 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
index a45d782cc1..36449407b5 100644
--- a/src/pyhf/infer/__init__.py
+++ b/src/pyhf/infer/__init__.py
@@ -139,17 +139,17 @@ def hypotest(
     )
 
     teststat = calc.teststatistic(poi_test)
-    sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
+    sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test)
 
     tb, _ = get_backend()
     CLsb_obs, CLb_obs, CLs_obs = tuple(
         tb.astensor(pvalue)
         for pvalue in calc.pvalues(
-            teststat, sig_plus_bkg_distribution, b_only_distribution
+            teststat, sig_plus_bkg_distribution, bkg_only_distribution
         )
     )
     CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
-        sig_plus_bkg_distribution, b_only_distribution
+        sig_plus_bkg_distribution, bkg_only_distribution
     )
 
     is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'
diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 4cc14752c9..6cd398e85e 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -306,7 +306,7 @@ def _false_case():
             )
         return teststat
 
-    def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+    def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution):
         r"""
         Calculate the :math:`p`-values for the observed test statistic under the
         signal + background and background-only model hypotheses.
@@ -334,7 +334,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
             teststat (:obj:`tensor`): The test statistic.
             sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:
@@ -343,11 +343,11 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
             :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
         """
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
-        CLb = b_only_distribution.pvalue(teststat)
+        CLb = bkg_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb
         return CLsb, CLb, CLs
 
-    def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+    def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution):
         r"""
         Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
         median significance of variations of the signal strength from the
@@ -376,7 +376,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         Args:
             sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
+            bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:
@@ -392,10 +392,10 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
                 zip(
                     *[
                         self.pvalues(
-                            test_stat, sig_plus_bkg_distribution, b_only_distribution
+                            test_stat, sig_plus_bkg_distribution, bkg_only_distribution
                         )
                         for test_stat in [
-                            b_only_distribution.expected_value(n_sigma)
+                            bkg_only_distribution.expected_value(n_sigma)
                             for n_sigma in [2, 1, 0, -1, -2]
                         ]
                     ]
@@ -675,7 +675,7 @@ def distributions(self, poi_test, track_progress=None):
         b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))
         return s_plus_b, b_only
 
-    def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
+    def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution):
         r"""
         Calculate the :math:`p`-values for the observed test statistic under the
         signal + background and background-only model hypotheses.
@@ -705,7 +705,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
             teststat (:obj:`tensor`): The test statistic.
             sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
+            bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:
@@ -714,11 +714,11 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
             :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
         """
         CLsb = sig_plus_bkg_distribution.pvalue(teststat)
-        CLb = b_only_distribution.pvalue(teststat)
+        CLb = bkg_only_distribution.pvalue(teststat)
         CLs = CLsb / CLb
         return CLsb, CLb, CLs
 
-    def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
+    def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution):
         r"""
         Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
         median significance of variations of the signal strength from the
@@ -748,7 +748,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
         Args:
             sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the signal + background hypothesis.
-            b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
+            bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
               The distribution for the background-only hypothesis.
 
         Returns:
@@ -762,9 +762,9 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
                 self.pvalues(
                     tb.astensor(test_stat),
                     sig_plus_bkg_distribution,
-                    b_only_distribution,
+                    bkg_only_distribution,
                 )
-                for test_stat in b_only_distribution.samples
+                for test_stat in bkg_only_distribution.samples
             ]
         )
         # TODO: Add percentile to tensorlib

From b8207b9df0697dd2c4db0231221c1c990dbd71ff Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Wed, 20 Jan 2021 01:41:42 -0600
Subject: [PATCH 29/30] Uncomment pyhf bits from validation scripts

---
 validation/StandardHypoTestDemo.py |  2 +-
 validation/run_toys.py             | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py
index 89a4fb8cac..53f52938a0 100644
--- a/validation/StandardHypoTestDemo.py
+++ b/validation/StandardHypoTestDemo.py
@@ -121,4 +121,4 @@ def pyhf_version(ntoys=5000, seed=0):
     StandardHypoTestDemo(
         infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
     )
-    # pyhf_version()
+    pyhf_version()
diff --git a/validation/run_toys.py b/validation/run_toys.py
index c8f6b6bb7f..6941797403 100644
--- a/validation/run_toys.py
+++ b/validation/run_toys.py
@@ -4,6 +4,7 @@
 import pyhf
 import pyhf.contrib.viz.brazil as brazil
 import ROOT
+import sys
 
 
 def run_toys_ROOT(infile, ntoys):
@@ -100,8 +101,7 @@ def run_toys_pyhf(ntoys=2_000, seed=0):
 
 
 if __name__ == "__main__":
-    # import sys
-    # run_toys_ROOT(
-    #     infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
-    # )
-    run_toys_pyhf(ntoys=20_000)
+    run_toys_ROOT(
+        infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000
+    )
+    run_toys_pyhf(ntoys=2_000)

From 26ada72bc35d46f3746ef8a8aecf0755d4d779b1 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Wed, 20 Jan 2021 02:24:33 -0600
Subject: [PATCH 30/30] Make more explicit the percentiles are for Normal dist

---
 src/pyhf/infer/calculators.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 6cd398e85e..49aa11edb3 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -771,10 +771,11 @@ def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution):
         # c.f. Issue #815, PR #817
         import numpy as np
 
-        _percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]
+        # percentiles for -2, -1, 0, 1, 2 standard deviations of the Normal distribution
+        normal_percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]
         pvalues_exp_band = np.percentile(
             tb.tolist(pvalues),
-            _percentiles,
+            normal_percentiles,
             axis=0,
         ).T.tolist()
         return pvalues_exp_band