From 590b0672698251074961b79333d8f79859d96dc6 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Fri, 30 Oct 2020 17:04:06 +0100 Subject: [PATCH 01/30] unify inits --- src/pyhf/infer/calculators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 37d3c8ec64..aa98658305 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -551,7 +551,7 @@ def distributions(self, poi_test, track_progress=None): poi_test, sample, self.pdf, - signal_pars, + self.init_pars, self.par_bounds, self.fixed_params, ) @@ -564,7 +564,7 @@ def distributions(self, poi_test, track_progress=None): poi_test, sample, self.pdf, - bkg_pars, + self.init_pars, self.par_bounds, self.fixed_params, ) From a8b578b76d0981c5c858f166453ffa488bdb0baa Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Wed, 4 Nov 2020 11:51:18 +0100 Subject: [PATCH 02/30] add ROOT cross-check --- validation/StandardHypoTestDemo.py | 55 +++++++++++ validation/xmlimport_input_bkg.json | 146 ++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 validation/StandardHypoTestDemo.py create mode 100644 validation/xmlimport_input_bkg.json diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py new file mode 100644 index 0000000000..e5551cde6c --- /dev/null +++ b/validation/StandardHypoTestDemo.py @@ -0,0 +1,55 @@ +import ROOT + +def StandardHypoTestDemo( + infile, + ntoys, + workspaceName = "combined", + modelSBName = "ModelConfig", + dataName = "obsData", +): + + file = ROOT.TFile.Open(infile); + w = file.Get(workspaceName); + sbModel = w.obj(modelSBName); + data = w.data(dataName); + + bModel = sbModel.Clone(); + bModel.SetName(modelSBName+"B_only"); + var = (bModel.GetParametersOfInterest().first()); + oldval = var.getVal(); + var.setVal(0); + bModel.SetSnapshot( ROOT.RooArgSet(var) ); + var.setVal(oldval); + + var = sbModel.GetParametersOfInterest().first(); + oldval = var.getVal(); + sbModel.SetSnapshot( ROOT.RooArgSet(var) ); + + profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()); + profll.SetOneSidedDiscovery(False); + profll.SetOneSided(True); + hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel); + # profll.SetPrintLevel(2) + + + print('by hand',profll.Evaluate(data,ROOT.RooArgSet(var) )) + + hypoCalc.SetToys(ntoys, ntoys) + + sampler = hypoCalc.GetTestStatSampler(); + sampler.SetTestStatistic(profll); + + paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest()) + + htr = hypoCalc.GetHypoTest(); + htr.SetPValueIsRightTail(True); + htr.SetBackgroundAsAlt(True); + htr.Print(); + plot = ROOT.RooStats.HypoTestPlot(htr,100); + plot.SetLogYaxis(True); + plot.Draw(); + ROOT.gPad.SaveAs("plot.png"); + +import sys +if __name__ == '__main__': + StandardHypoTestDemo(sys.argv[1],int(sys.argv[2]) if len(sys.argv) > 2 else 2000) diff --git a/validation/xmlimport_input_bkg.json b/validation/xmlimport_input_bkg.json new file mode 100644 index 0000000000..e515969261 --- /dev/null +++ b/validation/xmlimport_input_bkg.json @@ -0,0 +1,146 @@ +{ + "channels": [ + { + "name": "channel1", + "samples": [ + { + "data": [ + 20.0, + 10.0 + ], + "modifiers": [ + { + "data": { + "hi": 1.05, + "lo": 0.95 + }, + "name": "syst1", + "type": "normsys" + }, + { + "data": null, + "name": "SigXsecOverSM", + "type": "normfactor" + } + ], + "name": "signal" + }, + { + "data": [ + 100.0, + 0.0 + ], + "modifiers": [ + { + "data": null, + "name": "lumi", + "type": "lumi" + }, + { + "data": [ + 5.000000074505806, + 0.0 + ], + "name": "staterror_channel1", + "type": "staterror" + }, + { + "data": { + "hi": 1.05, + "lo": 0.95 + }, + "name": "syst2", + "type": "normsys" + } + ], + "name": "background1" + }, + { + "data": [ + 0.0, + 100.0 + ], + "modifiers": [ + { + "data": null, + "name": "lumi", + "type": "lumi" + }, + { + "data": [ + 0.0, + 10.0 + ], + "name": "staterror_channel1", + "type": "staterror" + }, + { + "data": { + "hi": 1.05, + "lo": 0.95 + }, + "name": "syst3", + "type": "normsys" + } + ], + "name": "background2" + } + ] + } + ], + "measurements": [ + { + "config": { + "parameters": [ + { + "auxdata": [ + 1.0 + ], + "bounds": [ + [ + 0.5, + 1.5 + ] + ], + "fixed": true, + "inits": [ + 1.0 + ], + "name": "lumi", + "sigmas": [ + 0.1 + ] + }, + { + "bounds": [ + [ + 0.0, + 3.0 + ] + ], + "inits": [ + 1.0 + ], + "name": "SigXsecOverSM" + }, + { + "fixed": true, + "name": "syst1" + } + ], + "poi": "SigXsecOverSM" + }, + "name": "GaussExample" + } + ], + "observations": [ + { + "data": [ + 100.0, + 100.0 + ], + "name": "channel1" + } + ], + "version": "1.0.0" +} From 1af5697cc17508dc3b1fb347d03050b28ba37640 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Sun, 8 Nov 2020 18:27:15 +0100 Subject: [PATCH 03/30] better fix --- src/pyhf/infer/__init__.py | 56 +++++------------ src/pyhf/infer/calculators.py | 48 ++++++++++++++- src/pyhf/infer/intervals.py | 1 + tests/test_infer.py | 18 +++--- validation/StandardHypoTestDemo.py | 99 ++++++++++++++++++------------ 5 files changed, 133 insertions(+), 89 deletions(-) diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py index 2195c0cf8c..8a2d159b79 100644 --- a/src/pyhf/infer/__init__.py +++ b/src/pyhf/infer/__init__.py @@ -141,57 +141,31 @@ def hypotest( teststat = calc.teststatistic(poi_test) sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test) - CLsb = sig_plus_bkg_distribution.pvalue(teststat) - CLb = b_only_distribution.pvalue(teststat) - CLs = CLsb / CLb - - tensorlib, _ = get_backend() - # Ensure that all CL values are 0-d tensors - CLsb, CLb, CLs = ( - tensorlib.astensor(CLsb), - tensorlib.astensor(CLb), - tensorlib.astensor(CLs), + pvalues = CLsb_obs, CLb_obs, CLs_obs = calc.pvalues( + teststat, sig_plus_bkg_distribution, b_only_distribution + ) + tb, _ = get_backend() + CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues) + CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( + sig_plus_bkg_distribution, b_only_distribution ) is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0' - _returns = [CLsb if is_q0 else CLs] + _returns = [CLsb_obs if is_q0 else CLs_obs] if return_tail_probs: if is_q0: - _returns.append([CLb]) + _returns.append([CLb_obs]) else: - _returns.append([CLsb, CLb]) + _returns.append([CLsb_obs, CLb_obs]) + + pvalues_exp = CLsb_exp if is_q0 else CLs_exp if return_expected_set: - CLs_exp = [] - for n_sigma in [2, 1, 0, -1, -2]: - - expected_bonly_teststat = b_only_distribution.expected_value(n_sigma) - - if is_q0: - # despite the name in this case this is the discovery p value - CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat) - else: - CLs = sig_plus_bkg_distribution.pvalue( - expected_bonly_teststat - ) / b_only_distribution.pvalue(expected_bonly_teststat) - CLs_exp.append(tensorlib.astensor(CLs)) if return_expected: - _returns.append(CLs_exp[2]) - _returns.append(CLs_exp) + _returns.append(tb.astensor(pvalues_exp[2])) + _returns.append(pvalues_exp) elif return_expected: - n_sigma = 0 - expected_bonly_teststat = b_only_distribution.expected_value(n_sigma) - - if is_q0: - # despite the name in this case this is the discovery p value - CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat) - else: - CLs = sig_plus_bkg_distribution.pvalue( - expected_bonly_teststat - ) / b_only_distribution.pvalue(expected_bonly_teststat) - - _returns.append(tensorlib.astensor(CLs)) - + _returns.append(tb.astensor(pvalues_exp[2])) # Enforce a consistent return type of the observed CLs return tuple(_returns) if len(_returns) > 1 else _returns[0] diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index aa98658305..4daa47bfe6 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -306,6 +306,23 @@ def _false_case(): ) return teststat + def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + CLsb = sig_plus_bkg_distribution.pvalue(teststat) + CLb = b_only_distribution.pvalue(teststat) + CLs = CLsb / CLb + return CLsb, CLb, CLs + + def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + tb, _ = get_backend() + return tb.astensor( + [ + self.pvalues(v, sig_plus_bkg_distribution, b_only_distribution) + for v in [ + b_only_distribution.expected_value(x) for x in [2, 1, 0, -1, -2] + ] + ] + ).T + class EmpiricalDistribution: """ @@ -378,7 +395,11 @@ def pvalue(self, value): """ tensorlib, _ = get_backend() return ( - tensorlib.sum(tensorlib.where(self.samples >= value, 1, 0)) + tensorlib.sum( + tensorlib.where( + self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0) + ) + ) / tensorlib.shape(self.samples)[0] ) @@ -574,6 +595,31 @@ def distributions(self, poi_test, track_progress=None): b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat)) return s_plus_b, b_only + def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + CLsb = sig_plus_bkg_distribution.pvalue(teststat) + CLb = b_only_distribution.pvalue(teststat) + CLs = CLsb / CLb + return CLsb, CLb, CLs + + def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + tb, _ = get_backend() + pvalues = tb.astensor( + [ + self.pvalues( + tb.astensor(x), sig_plus_bkg_distribution, b_only_distribution + ) + for x in b_only_distribution.samples + ] + ) + import numpy as np + + pvalues_exp = np.percentile( + tb.tolist(pvalues), + [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681], + axis=0, + ) + return tb.astensor(pvalues_exp).T + def teststatistic(self, poi_test): """ Compute the test statistic for the observed data under the studied model. diff --git a/src/pyhf/infer/intervals.py b/src/pyhf/infer/intervals.py index f146f48dba..edb1a0f4cd 100644 --- a/src/pyhf/infer/intervals.py +++ b/src/pyhf/infer/intervals.py @@ -55,6 +55,7 @@ def upperlimit(data, model, scan, level=0.05, return_results=False): ] obs = tb.astensor([[r[0]] for r in results]) exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results]) + result_arrary = tb.concatenate([obs, exp], axis=1).T # observed limit and the (0, +-1, +-2)sigma expected limits diff --git a/tests/test_infer.py b/tests/test_infer.py index 1070b0f64a..391854f675 100644 --- a/tests/test_infer.py +++ b/tests/test_infer.py @@ -351,16 +351,16 @@ def test_toy_calculator(tmpdir, hypotest_args): ) assert qtilde_mu_bkg.samples.tolist() == pytest.approx( [ - 2.2664625238298868, - 1.081660885985002, - 2.757022090389057, - 1.3835689306226868, - 0.4707466955809423, + 2.2664625749100082, + 1.081660887453154, + 2.7570218408936853, + 1.3835691388297846, + 0.4707467005909507, 0.0, - 3.7166483694865065, - 3.8021896741039427, - 5.114135403520493, - 1.35111537136072, + 3.7166483705294127, + 3.8021896732709592, + 5.114135391143066, + 1.3511153731000718, ], 1e-07, ) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index e5551cde6c..58707ec95c 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -1,55 +1,78 @@ import ROOT +import numpy as np + def StandardHypoTestDemo( infile, ntoys, - workspaceName = "combined", - modelSBName = "ModelConfig", - dataName = "obsData", + workspaceName="combined", + modelSBName="ModelConfig", + dataName="obsData", ): - file = ROOT.TFile.Open(infile); - w = file.Get(workspaceName); - sbModel = w.obj(modelSBName); - data = w.data(dataName); - - bModel = sbModel.Clone(); - bModel.SetName(modelSBName+"B_only"); - var = (bModel.GetParametersOfInterest().first()); - oldval = var.getVal(); - var.setVal(0); - bModel.SetSnapshot( ROOT.RooArgSet(var) ); - var.setVal(oldval); - - var = sbModel.GetParametersOfInterest().first(); - oldval = var.getVal(); - sbModel.SetSnapshot( ROOT.RooArgSet(var) ); - - profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()); - profll.SetOneSidedDiscovery(False); - profll.SetOneSided(True); - hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel); - # profll.SetPrintLevel(2) + file = ROOT.TFile.Open(infile) + w = file.Get(workspaceName) + sbModel = w.obj(modelSBName) + data = w.data(dataName) + + bModel = sbModel.Clone() + bModel.SetName(modelSBName + "B_only") + var = bModel.GetParametersOfInterest().first() + oldval = var.getVal() + var.setVal(0) + bModel.SetSnapshot(ROOT.RooArgSet(var)) + var.setVal(oldval) + var = sbModel.GetParametersOfInterest().first() + oldval = var.getVal() + sbModel.SetSnapshot(ROOT.RooArgSet(var)) - print('by hand',profll.Evaluate(data,ROOT.RooArgSet(var) )) + profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()) + profll.SetOneSidedDiscovery(False) + profll.SetOneSided(True) + hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel) + # profll.SetPrintLevel(2) + + print('by hand', profll.Evaluate(data, ROOT.RooArgSet(var))) hypoCalc.SetToys(ntoys, ntoys) - sampler = hypoCalc.GetTestStatSampler(); - sampler.SetTestStatistic(profll); + sampler = hypoCalc.GetTestStatSampler() + sampler.SetTestStatistic(profll) paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest()) - - htr = hypoCalc.GetHypoTest(); - htr.SetPValueIsRightTail(True); - htr.SetBackgroundAsAlt(True); - htr.Print(); - plot = ROOT.RooStats.HypoTestPlot(htr,100); - plot.SetLogYaxis(True); - plot.Draw(); - ROOT.gPad.SaveAs("plot.png"); + + htr = hypoCalc.GetHypoTest() + htr.SetPValueIsRightTail(True) + htr.SetBackgroundAsAlt(True) + + cls_obs = htr.CLs() + ds = htr.GetNullDistribution() + db = htr.GetAltDistribution() + + r = ROOT.RooStats.HypoTestResult() + r.SetPValueIsRightTail(True) + r.SetNullDistribution(ds) + r.SetAltDistribution(db) + + values = [ + 1 / (r.SetTestStatisticData(v), r.CLs())[-1] + for v in ds.GetSamplingDistribution() + ] + print(values) + values = np.percentile( + values, [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681] + ) + print(values) + + htr.Print() + plot = ROOT.RooStats.HypoTestPlot(htr, 100) + plot.SetLogYaxis(True) + plot.Draw() + ROOT.gPad.SaveAs("plot.png") + import sys + if __name__ == '__main__': - StandardHypoTestDemo(sys.argv[1],int(sys.argv[2]) if len(sys.argv) > 2 else 2000) + StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000) From ba664ad2cbbec80a5f427f92f8a32dea504c7953 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Sun, 8 Nov 2020 19:01:20 +0100 Subject: [PATCH 04/30] better fix --- validation/StandardHypoTestDemo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index 58707ec95c..ee617e5a86 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -40,13 +40,10 @@ def StandardHypoTestDemo( sampler = hypoCalc.GetTestStatSampler() sampler.SetTestStatistic(profll) - paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest()) - htr = hypoCalc.GetHypoTest() htr.SetPValueIsRightTail(True) htr.SetBackgroundAsAlt(True) - cls_obs = htr.CLs() ds = htr.GetNullDistribution() db = htr.GetAltDistribution() From 9edfdea3b5aba7a195cc2d3a734b952d2a33ba72 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Sun, 8 Nov 2020 19:19:16 +0100 Subject: [PATCH 05/30] better fix --- src/pyhf/infer/calculators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 4daa47bfe6..49d516bde2 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -307,12 +307,14 @@ def _false_case(): return teststat def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + '''calculate pvalues.''' CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = b_only_distribution.pvalue(teststat) CLs = CLsb / CLb return CLsb, CLb, CLs def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + '''calculate expected pvalues.''' tb, _ = get_backend() return tb.astensor( [ @@ -596,12 +598,14 @@ def distributions(self, poi_test, track_progress=None): return s_plus_b, b_only def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + '''calculate pvalues.''' CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = b_only_distribution.pvalue(teststat) CLs = CLsb / CLb return CLsb, CLb, CLs def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + '''calculate expected pvalues.''' tb, _ = get_backend() pvalues = tb.astensor( [ From 4f4a2585eb9396830f30394a896bd6726d18cdd6 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Sun, 8 Nov 2020 20:48:49 +0100 Subject: [PATCH 06/30] better fix --- src/pyhf/infer/__init__.py | 4 +--- validation/StandardHypoTestDemo.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py index 8a2d159b79..b2e457e29b 100644 --- a/src/pyhf/infer/__init__.py +++ b/src/pyhf/infer/__init__.py @@ -141,9 +141,7 @@ def hypotest( teststat = calc.teststatistic(poi_test) sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test) - pvalues = CLsb_obs, CLb_obs, CLs_obs = calc.pvalues( - teststat, sig_plus_bkg_distribution, b_only_distribution - ) + pvalues = calc.pvalues(teststat, sig_plus_bkg_distribution, b_only_distribution) tb, _ = get_backend() CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues) CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index ee617e5a86..372cf96e34 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -24,7 +24,6 @@ def StandardHypoTestDemo( var.setVal(oldval) var = sbModel.GetParametersOfInterest().first() - oldval = var.getVal() sbModel.SetSnapshot(ROOT.RooArgSet(var)) profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()) From 126da495fce154f5567de080b8d716b937b59f22 Mon Sep 17 00:00:00 2001 From: Lukas Heinrich Date: Mon, 9 Nov 2020 13:36:41 +0100 Subject: [PATCH 07/30] add run_toys.py --- validation/run_toys.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 validation/run_toys.py diff --git a/validation/run_toys.py b/validation/run_toys.py new file mode 100644 index 0000000000..8eb152f752 --- /dev/null +++ b/validation/run_toys.py @@ -0,0 +1,61 @@ +import ROOT +import sys +import json + +infile = sys.argv[1] +ntoys = int(sys.argv[2]) if len(sys.argv) > 2 else 2000 + +infile = ROOT.TFile.Open(infile) +workspace = infile.Get("combined") +data = workspace.data("obsData") + +sbModel = workspace.obj("ModelConfig") +poi = sbModel.GetParametersOfInterest().first() + +sbModel.SetSnapshot(ROOT.RooArgSet(poi)) + +bModel = sbModel.Clone() +bModel.SetName("bonly") +poi.setVal(0) +bModel.SetSnapshot(ROOT.RooArgSet(poi)) + +ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel) +ac.SetToys(ntoys, ntoys) + +profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()) +profll.SetOneSidedDiscovery(False) +profll.SetOneSided(True) +ac.GetTestStatSampler().SetTestStatistic(profll) + +calc = ROOT.RooStats.HypoTestInverter(ac) +calc.SetConfidenceLevel(0.95) +calc.UseCLs(True) + +npoints = 2 + +calc.RunFixedScan(npoints, 1.0, 1.2) + +result = calc.GetInterval() + +plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result) + + +data = [] +for i in range(npoints): + d = { + 'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()), + 'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()), + 'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()) + } + data.append(d) + +json.dump(data,open('scan.json','w')) + +c = ROOT.TCanvas() +c.SetLogy(False) +plot.Draw("OBS EXP CLb 2CL") +c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0,0.2) +# c.GetYaxis().SetRangeUser(0,0.2) +c.Draw() +c.SaveAs('scan.pdf') + From 02d0a74b9523f66f0d68af5f2a38c4d96f8fc768 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Nov 2020 12:37:17 +0000 Subject: [PATCH 08/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- validation/run_toys.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/validation/run_toys.py b/validation/run_toys.py index 8eb152f752..39d4f269ca 100644 --- a/validation/run_toys.py +++ b/validation/run_toys.py @@ -35,7 +35,7 @@ calc.RunFixedScan(npoints, 1.0, 1.2) -result = calc.GetInterval() +result = calc.GetInterval() plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result) @@ -45,17 +45,16 @@ d = { 'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()), 'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()), - 'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()) + 'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()), } data.append(d) -json.dump(data,open('scan.json','w')) +json.dump(data, open('scan.json', 'w')) c = ROOT.TCanvas() c.SetLogy(False) plot.Draw("OBS EXP CLb 2CL") -c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0,0.2) +c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2) # c.GetYaxis().SetRangeUser(0,0.2) c.Draw() c.SaveAs('scan.pdf') - From 5f84190b392a2502f647bf0d735ca433c61d3de5 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 22:16:28 -0600 Subject: [PATCH 09/30] Add docstring for asymptotic pvalues --- src/pyhf/infer/calculators.py | 40 ++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 49d516bde2..ad6d429402 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -211,8 +211,8 @@ def distributions(self, poi_test): >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde") >>> _ = asymptotic_calculator.teststatistic(mu_test) - >>> qmu_sig, qmu_bkg = asymptotic_calculator.distributions(mu_test) - >>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test) + >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) + >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test) (0.002192624107163899, 0.15865525393145707) Args: @@ -307,7 +307,41 @@ def _false_case(): return teststat def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): - '''calculate pvalues.''' + r""" + Calculate the :math:`p`-values for the observed test statistic under the + signal + background and background-only model hypotheses. + + Example: + + >>> import pyhf + >>> pyhf.set_backend("numpy") + >>> model = pyhf.simplemodels.hepdata_like( + ... signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0] + ... ) + >>> observations = [51, 48] + >>> data = observations + model.config.auxdata + >>> mu_test = 1.0 + >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( + ... data, model, qtilde=True + ... ) + >>> q_tilde = asymptotic_calculator.teststatistic(mu_test) + >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) + >>> CLsb, CLb, CLs = asymptotic_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) + >>> CLsb, CLb, CLs + (0.023325019427864607, 0.4441593996111411, 0.05251497423736956) + + Args: + teststat (:obj:`tensor`): The test statistic. + sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the signal + background hypothesis. + b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the background-only hypothesis. + + Returns: + Tuple (:obj:`float`): The :math:`p`-values for the test statistic + corresponding to the :math:`\mathrm{CL}_{s+b}`, + :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. + """ CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = b_only_distribution.pvalue(teststat) CLs = CLsb / CLb From 9c65eef75164a85a56a6a4d275608bb1d6fcd21a Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 22:34:10 -0600 Subject: [PATCH 10/30] Add docstrings for expected_pvalues --- src/pyhf/infer/calculators.py | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index ad6d429402..c7a8f772dd 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -348,7 +348,42 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): return CLsb, CLb, CLs def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): - '''calculate expected pvalues.''' + r""" + Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the + median significance of variations of the signal strength from the + background only hypothesis :math:`\left(\mu=0\right)` at + :math:`(-2,-1,0,1,2)\sigma`. + + Example: + + >>> import pyhf + >>> pyhf.set_backend("numpy") + >>> model = pyhf.simplemodels.hepdata_like( + ... signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0] + ... ) + >>> observations = [51, 48] + >>> data = observations + model.config.auxdata + >>> mu_test = 1.0 + >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( + ... data, model, qtilde=True + ... ) + >>> _ = asymptotic_calculator.teststatistic(mu_test) + >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) + >>> CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) + >>> CLs_exp_band + [0.0026062609501074576, 0.01382005356161206, 0.06445320535890459, 0.23525643861460702, 0.573036205919389] + + Args: + sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the signal + background hypothesis. + b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the background-only hypothesis. + + Returns: + Tuple (:obj:`float`): The :math:`p`-values for the test statistic + corresponding to the :math:`\mathrm{CL}_{s+b}`, + :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. + """ tb, _ = get_backend() return tb.astensor( [ From b8a008230533247719e831d4b2626fb4434a773f Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 22:39:06 -0600 Subject: [PATCH 11/30] more explicit var names --- src/pyhf/infer/calculators.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index c7a8f772dd..613772aad1 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -384,12 +384,14 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ + # Calling pvalues is easier then repeating the CLs calculation here tb, _ = get_backend() return tb.astensor( [ - self.pvalues(v, sig_plus_bkg_distribution, b_only_distribution) - for v in [ - b_only_distribution.expected_value(x) for x in [2, 1, 0, -1, -2] + self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution) + for test_stat in [ + b_only_distribution.expected_value(n_sigma) + for n_sigma in [2, 1, 0, -1, -2] ] ] ).T From 904f6522cf9a95f5b72d2f494adab3b4b2d69d1a Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 23:09:09 -0600 Subject: [PATCH 12/30] Add docstrings for emperical distributions methods --- src/pyhf/infer/calculators.py | 80 +++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 613772aad1..89c6c84f58 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -602,8 +602,8 @@ def distributions(self, poi_test, track_progress=None): >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) - >>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test) - >>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test) + >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) + >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test) (0.14, 0.76) Args: @@ -669,14 +669,86 @@ def distributions(self, poi_test, track_progress=None): return s_plus_b, b_only def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): - '''calculate pvalues.''' + r""" + Calculate the :math:`p`-values for the observed test statistic under the + signal + background and background-only model hypotheses. + + Example: + + >>> import pyhf + >>> import numpy.random as random + >>> random.seed(0) + >>> pyhf.set_backend("numpy") + >>> model = pyhf.simplemodels.hepdata_like( + ... signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0] + ... ) + >>> observations = [51, 48] + >>> data = observations + model.config.auxdata + >>> mu_test = 1.0 + >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( + ... data, model, ntoys=100, track_progress=False + ... ) + >>> q_tilde = toy_calculator.teststatistic(mu_test) + >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) + >>> CLsb, CLb, CLs = toy_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) + >>> CLsb, CLb, CLs + (0.01, 0.41, 0.024390243902439025) + + Args: + teststat (:obj:`tensor`): The test statistic. + sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the signal + background hypothesis. + b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the background-only hypothesis. + + Returns: + Tuple (:obj:`float`): The :math:`p`-values for the test statistic + corresponding to the :math:`\mathrm{CL}_{s+b}`, + :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. + """ CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = b_only_distribution.pvalue(teststat) CLs = CLsb / CLb return CLsb, CLb, CLs def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): - '''calculate expected pvalues.''' + r""" + Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the + median significance of variations of the signal strength from the + background only hypothesis :math:`\left(\mu=0\right)` at + :math:`(-2,-1,0,1,2)\sigma`. + + Example: + + >>> import pyhf + >>> import numpy.random as random + >>> random.seed(0) + >>> pyhf.set_backend("numpy") + >>> model = pyhf.simplemodels.hepdata_like( + ... signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0] + ... ) + >>> observations = [51, 48] + >>> data = observations + model.config.auxdata + >>> mu_test = 1.0 + >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( + ... data, model, ntoys=100, track_progress=False + ... ) + >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) + >>> CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) + >>> CLs_exp_band + [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0] + + Args: + sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the signal + background hypothesis. + b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + The distribution for the background-only hypothesis. + + Returns: + Tuple (:obj:`float`): The :math:`p`-values for the test statistic + corresponding to the :math:`\mathrm{CL}_{s+b}`, + :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. + """ tb, _ = get_backend() pvalues = tb.astensor( [ From a82dbb3078d767cbd89581af0ced23508d83f23a Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 23:10:12 -0600 Subject: [PATCH 13/30] Note Issues for percentile in tensorlib --- src/pyhf/infer/calculators.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 89c6c84f58..8c705f7fe4 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -753,11 +753,15 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): pvalues = tb.astensor( [ self.pvalues( - tb.astensor(x), sig_plus_bkg_distribution, b_only_distribution + tb.astensor(test_stat), + sig_plus_bkg_distribution, + b_only_distribution, ) - for x in b_only_distribution.samples + for test_stat in b_only_distribution.samples ] ) + # TODO: Add percentile to tensorlib + # c.f. Issue #815, PR #817 import numpy as np pvalues_exp = np.percentile( From 99edeae539f17e1224c0f0d2829d4025c302ca57 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 23:10:36 -0600 Subject: [PATCH 14/30] Make floats instead of tesnors for consistency Check on this --- docs/api.rst | 1 + src/pyhf/infer/calculators.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index a7fa872067..e47f1256c6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -121,6 +121,7 @@ Inference .. autosummary:: :toctree: _generated/ + :template: modifierclass.rst test_statistics.q0 test_statistics.qmu diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 8c705f7fe4..f6a013b969 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -768,8 +768,8 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): tb.tolist(pvalues), [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681], axis=0, - ) - return tb.astensor(pvalues_exp).T + ).T.tolist() + return pvalues_exp def teststatistic(self, poi_test): """ From 5ef53527ba3a3bf5bdde05b6ff03af24ec73973b Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 23:23:16 -0600 Subject: [PATCH 15/30] Simplify and use more explicit naming Change name from CLs_exp to CLs_exp_band to make clear not a singular value --- src/pyhf/infer/__init__.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py index b2e457e29b..36881b8c0d 100644 --- a/src/pyhf/infer/__init__.py +++ b/src/pyhf/infer/__init__.py @@ -141,9 +141,13 @@ def hypotest( teststat = calc.teststatistic(poi_test) sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test) - pvalues = calc.pvalues(teststat, sig_plus_bkg_distribution, b_only_distribution) tb, _ = get_backend() - CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues) + CLsb_obs, CLb_obs, CLs_obs = tuple( + tb.astensor(pvalue) + for pvalue in calc.pvalues( + teststat, sig_plus_bkg_distribution, b_only_distribution + ) + ) CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( sig_plus_bkg_distribution, b_only_distribution ) @@ -157,13 +161,13 @@ def hypotest( else: _returns.append([CLsb_obs, CLb_obs]) - pvalues_exp = CLsb_exp if is_q0 else CLs_exp + pvalues_exp_band = CLsb_exp if is_q0 else CLs_exp if return_expected_set: if return_expected: - _returns.append(tb.astensor(pvalues_exp[2])) - _returns.append(pvalues_exp) + _returns.append(tb.astensor(pvalues_exp_band[2])) + _returns.append(pvalues_exp_band) elif return_expected: - _returns.append(tb.astensor(pvalues_exp[2])) + _returns.append(tb.astensor(pvalues_exp_band[2])) # Enforce a consistent return type of the observed CLs return tuple(_returns) if len(_returns) > 1 else _returns[0] From 8d4e8163ee514b60604c1073abcacf3e6040d741 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Mon, 9 Nov 2020 23:32:39 -0600 Subject: [PATCH 16/30] Correct distribution type --- src/pyhf/infer/calculators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index f6a013b969..f4fc723d97 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -696,9 +696,9 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): Args: teststat (:obj:`tensor`): The test statistic. - sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: @@ -739,9 +739,9 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0] Args: - sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: From affac46259b03302d5982e98e9d1436635f0bbf7 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Tue, 10 Nov 2020 01:20:48 -0600 Subject: [PATCH 17/30] Make clear purpose of percentiles --- src/pyhf/infer/calculators.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index f4fc723d97..847a48003a 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -764,12 +764,13 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): # c.f. Issue #815, PR #817 import numpy as np - pvalues_exp = np.percentile( + _percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681] + pvalues_exp_band = np.percentile( tb.tolist(pvalues), - [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681], + _percentiles, axis=0, ).T.tolist() - return pvalues_exp + return pvalues_exp_band def teststatistic(self, poi_test): """ From 03c444ab4597f5a739e53ecd29deb05e10e2610c Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Tue, 10 Nov 2020 02:33:13 -0600 Subject: [PATCH 18/30] Add pyhf version to hypo test demo --- validation/StandardHypoTestDemo.py | 51 ++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index 372cf96e34..851fd6eec0 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -1,5 +1,9 @@ -import ROOT +import sys import numpy as np +import json +import matplotlib.pyplot as plt +import ROOT +import pyhf def StandardHypoTestDemo( @@ -68,7 +72,50 @@ def StandardHypoTestDemo( ROOT.gPad.SaveAs("plot.png") -import sys +def pyhf_version(ntoys=5000, seed=0): + np.random.seed(seed) + with open("validation/xmlimport_input_bkg.json") as ws_json: + workspace = pyhf.Workspace(json.load(ws_json)) + + model = workspace.model() + data = workspace.data(model) + toy_calculator = pyhf.infer.utils.create_calculator( + "toybased", + data, + model, + ntoys=ntoys, + ) + test_mu = 1.0 + sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(test_mu) + q_tilde = toy_calculator.teststatistic(test_mu) + + bins = np.linspace(0, 8, 100) + + fig, ax = plt.subplots() + ax.hist( + bkg_dist.samples / 2.0, + alpha=0.2, + bins=bins, + density=True, + label=r"$f(\tilde{q}|0)$ Background", + ) + ax.hist( + sig_plus_bkg_dist.samples / 2.0, + alpha=0.2, + bins=bins, + density=True, + label=r"$f(\tilde{q}|1)$ Signal", + ) + ax.axvline(q_tilde / 2, color="black", label="Observed test statistic") + ax.semilogy() + + ax.set_xlabel(r"$\tilde{q}$") + ax.set_ylabel(r"$f(\tilde{q}|\mu')$") + ax.legend(loc="best") + + fig.savefig("pyhf_version.png") + if __name__ == '__main__': StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000) + # pyhf_version() From 1cbda0027b53dbf9b1597b2de6e5d26798858e58 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Tue, 10 Nov 2020 11:41:39 -0600 Subject: [PATCH 19/30] add notes on test stat definition --- validation/StandardHypoTestDemo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index 851fd6eec0..ce2b68aae6 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -92,6 +92,7 @@ def pyhf_version(ntoys=5000, seed=0): bins = np.linspace(0, 8, 100) fig, ax = plt.subplots() + # Compare to ROOT's choice of test stat being NLL instead of 2*NLL ax.hist( bkg_dist.samples / 2.0, alpha=0.2, @@ -117,5 +118,7 @@ def pyhf_version(ntoys=5000, seed=0): if __name__ == '__main__': - StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000) + StandardHypoTestDemo( + infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 + ) # pyhf_version() From 3b93c1205d0fe7d253e7bdc4eeb099d4c4a6acf0 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Tue, 10 Nov 2020 11:55:22 -0600 Subject: [PATCH 20/30] Restructure to allow multiple runs --- validation/StandardHypoTestDemo.py | 2 +- validation/run_toys.py | 88 ++++++++++++++++-------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index ce2b68aae6..89a4fb8cac 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -117,7 +117,7 @@ def pyhf_version(ntoys=5000, seed=0): fig.savefig("pyhf_version.png") -if __name__ == '__main__': +if __name__ == "__main__": StandardHypoTestDemo( infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 ) diff --git a/validation/run_toys.py b/validation/run_toys.py index 39d4f269ca..b5f7f2be8a 100644 --- a/validation/run_toys.py +++ b/validation/run_toys.py @@ -2,59 +2,65 @@ import sys import json -infile = sys.argv[1] -ntoys = int(sys.argv[2]) if len(sys.argv) > 2 else 2000 -infile = ROOT.TFile.Open(infile) -workspace = infile.Get("combined") -data = workspace.data("obsData") +def run_toys_ROOT(infile, ntoys): + infile = ROOT.TFile.Open(infile) + workspace = infile.Get("combined") + data = workspace.data("obsData") -sbModel = workspace.obj("ModelConfig") -poi = sbModel.GetParametersOfInterest().first() + sbModel = workspace.obj("ModelConfig") + poi = sbModel.GetParametersOfInterest().first() -sbModel.SetSnapshot(ROOT.RooArgSet(poi)) + sbModel.SetSnapshot(ROOT.RooArgSet(poi)) -bModel = sbModel.Clone() -bModel.SetName("bonly") -poi.setVal(0) -bModel.SetSnapshot(ROOT.RooArgSet(poi)) + bModel = sbModel.Clone() + bModel.SetName("bonly") + poi.setVal(0) + bModel.SetSnapshot(ROOT.RooArgSet(poi)) -ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel) -ac.SetToys(ntoys, ntoys) + ac = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel) + ac.SetToys(ntoys, ntoys) -profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()) -profll.SetOneSidedDiscovery(False) -profll.SetOneSided(True) -ac.GetTestStatSampler().SetTestStatistic(profll) + profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf()) + profll.SetOneSidedDiscovery(False) + profll.SetOneSided(True) + ac.GetTestStatSampler().SetTestStatistic(profll) -calc = ROOT.RooStats.HypoTestInverter(ac) -calc.SetConfidenceLevel(0.95) -calc.UseCLs(True) + calc = ROOT.RooStats.HypoTestInverter(ac) + calc.SetConfidenceLevel(0.95) + calc.UseCLs(True) -npoints = 2 + npoints = 2 -calc.RunFixedScan(npoints, 1.0, 1.2) + calc.RunFixedScan(npoints, 1.0, 1.2) -result = calc.GetInterval() + result = calc.GetInterval() -plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result) + plot = ROOT.RooStats.HypoTestInverterPlot("plot", "plot", result) + data = [] + for i in range(npoints): + d = { + "test_b": list(result.GetAltTestStatDist(i).GetSamplingDistribution()), + "test_s": list(result.GetNullTestStatDist(i).GetSamplingDistribution()), + "pvals": list(result.GetExpectedPValueDist(i).GetSamplingDistribution()), + } + data.append(d) -data = [] -for i in range(npoints): - d = { - 'test_b': list(result.GetAltTestStatDist(i).GetSamplingDistribution()), - 'test_s': list(result.GetNullTestStatDist(i).GetSamplingDistribution()), - 'pvals': list(result.GetExpectedPValueDist(i).GetSamplingDistribution()), - } - data.append(d) + json.dump(data, open("scan.json", "w")) -json.dump(data, open('scan.json', 'w')) + canvas = ROOT.TCanvas() + canvas.SetLogy(False) + plot.Draw("OBS EXP CLb 2CL") + canvas.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2) + canvas.Draw() + extensions = ["pdf", "png"] + for ext in extensions: + canvas.SaveAs(f"poi_scan_ROOT.{ext}") -c = ROOT.TCanvas() -c.SetLogy(False) -plot.Draw("OBS EXP CLb 2CL") -c.GetListOfPrimitives().At(0).GetYaxis().SetRangeUser(0, 0.2) -# c.GetYaxis().SetRangeUser(0,0.2) -c.Draw() -c.SaveAs('scan.pdf') + +if __name__ == "__main__": + run_toys_ROOT( + infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 + ) + # run_toys_pyhf() From 4933911dfbaa2e96569b8a7d0a24b4eb0b9c8037 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 12 Nov 2020 03:18:24 -0600 Subject: [PATCH 21/30] Add temp for debugging --- validation/run_toys.py | 51 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/validation/run_toys.py b/validation/run_toys.py index b5f7f2be8a..965a709772 100644 --- a/validation/run_toys.py +++ b/validation/run_toys.py @@ -1,6 +1,10 @@ -import ROOT import sys import json +import numpy as np +import matplotlib.pyplot as plt +import pyhf +import pyhf.contrib.viz.brazil as brazil +import ROOT def run_toys_ROOT(infile, ntoys): @@ -59,8 +63,45 @@ def run_toys_ROOT(infile, ntoys): canvas.SaveAs(f"poi_scan_ROOT.{ext}") +def run_toys_pyhf(ntoys=2_000, seed=0): + np.random.seed(seed) + # with open("validation/xmlimport_input_bkg.json") as ws_json: + with open("debug/issue_workpace/issue_ws.json") as ws_json: + workspace = pyhf.Workspace(json.load(ws_json)) + + model = workspace.model() + data = workspace.data(model) + + n_points = 4 + test_mus = np.linspace(1.0, 1.2, n_points) + fit_results = [ + pyhf.infer.hypotest( + mu, data, model, return_expected_set=True, calctype="toybased", ntoys=ntoys + ) + for mu in test_mus + ] + + fig, ax = plt.subplots() + brazil.plot_results(ax, test_mus, fit_results) + ax.set_xlabel(r"$\mu$") + ax.set_ylabel(r"$\mathrm{CL}_{s}$") + _buffer = 0.02 + ax.set_xlim(1.0 - _buffer, 1.2 + _buffer) + ax.set_ylim(0.0, 0.2) + + extensions = ["pdf", "png"] + for ext in extensions: + fig.savefig(f"poi_scan_pyhf.{ext}") + + ax.set_ylim(1e-3, 0.2) + ax.semilogy() + + for ext in extensions: + fig.savefig(f"poi_scan_logy_pyhf.{ext}") + + if __name__ == "__main__": - run_toys_ROOT( - infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 - ) - # run_toys_pyhf() + # run_toys_ROOT( + # infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 + # ) + run_toys_pyhf(ntoys=20_000) From 4f0d43090e196cbb38799a472540aa16f35fe52c Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 23 Dec 2020 18:45:22 -0600 Subject: [PATCH 22/30] Mark off non-asymptotic calculators in README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 19fbef79aa..a9ba3f56f5 100644 --- a/README.rst +++ b/README.rst @@ -116,6 +116,7 @@ Implemented variations: - ☑ ShapeFactor - ☑ StatError - ☑ Lumi Uncertainty + - ☑ Non-asymptotic calculators Computational Backends: - ☑ NumPy @@ -134,7 +135,6 @@ Todo ---- - ☐ StatConfig -- ☐ Non-asymptotic calculators results obtained from this package are validated against output computed from HistFactory workspaces From e66319d493cc79e57b3a1476f83c9ae239e2e965 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 18 Jan 2021 19:57:08 -0500 Subject: [PATCH 23/30] fix flake --- validation/run_toys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validation/run_toys.py b/validation/run_toys.py index 965a709772..c8f6b6bb7f 100644 --- a/validation/run_toys.py +++ b/validation/run_toys.py @@ -1,4 +1,3 @@ -import sys import json import numpy as np import matplotlib.pyplot as plt @@ -101,6 +100,7 @@ def run_toys_pyhf(ntoys=2_000, seed=0): if __name__ == "__main__": + # import sys # run_toys_ROOT( # infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 # ) From bd3d3bf1aa0e6be9e0f9684c250f2f78cda4b3ab Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 18 Jan 2021 20:21:31 -0500 Subject: [PATCH 24/30] fix validation --- tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index 481f349680..5c8363ea42 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -124,7 +124,7 @@ def expected_result_1bin_shapesys_q0(): @pytest.fixture(scope='module') def expected_result_1bin_shapesys_q0_toys(): expected_result = { - "exp": [0.0, 0.0005, 0.0145, 0.1205, 0.403], + "exp": [0.0, 0.0005, 0.0145, 0.1205, 0.402761], "obs": 0.005, } return expected_result From 262d1325fe6d66166b913b87bb526b9a95887716 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 18 Jan 2021 20:29:57 -0500 Subject: [PATCH 25/30] fix up the transpose --- src/pyhf/infer/__init__.py | 4 +++- src/pyhf/infer/calculators.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py index 36881b8c0d..a45d782cc1 100644 --- a/src/pyhf/infer/__init__.py +++ b/src/pyhf/infer/__init__.py @@ -161,7 +161,9 @@ def hypotest( else: _returns.append([CLsb_obs, CLb_obs]) - pvalues_exp_band = CLsb_exp if is_q0 else CLs_exp + pvalues_exp_band = [ + tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp) + ] if return_expected_set: if return_expected: _returns.append(tb.astensor(pvalues_exp_band[2])) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 847a48003a..1292a834b0 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -386,15 +386,15 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): """ # Calling pvalues is easier then repeating the CLs calculation here tb, _ = get_backend() - return tb.astensor( - [ + return zip( + *[ self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution) for test_stat in [ b_only_distribution.expected_value(n_sigma) for n_sigma in [2, 1, 0, -1, -2] ] ] - ).T + ) class EmpiricalDistribution: From b2ce8a39fb704b966c5c2e20531583694ae8e677 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 18 Jan 2021 20:47:15 -0500 Subject: [PATCH 26/30] fix up doctest --- src/pyhf/infer/calculators.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 1292a834b0..4cc14752c9 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -322,7 +322,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( - ... data, model, qtilde=True + ... data, model, test_stat="qtilde" ... ) >>> q_tilde = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) @@ -365,11 +365,11 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( - ... data, model, qtilde=True + ... data, model, test_stat="qtilde" ... ) >>> _ = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) - >>> CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) + >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [0.0026062609501074576, 0.01382005356161206, 0.06445320535890459, 0.23525643861460702, 0.573036205919389] @@ -386,14 +386,21 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): """ # Calling pvalues is easier then repeating the CLs calculation here tb, _ = get_backend() - return zip( - *[ - self.pvalues(test_stat, sig_plus_bkg_distribution, b_only_distribution) - for test_stat in [ - b_only_distribution.expected_value(n_sigma) - for n_sigma in [2, 1, 0, -1, -2] - ] - ] + return list( + map( + list, + zip( + *[ + self.pvalues( + test_stat, sig_plus_bkg_distribution, b_only_distribution + ) + for test_stat in [ + b_only_distribution.expected_value(n_sigma) + for n_sigma in [2, 1, 0, -1, -2] + ] + ] + ), + ) ) @@ -734,7 +741,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): ... data, model, ntoys=100, track_progress=False ... ) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) - >>> CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) + >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [0.0, 0.0, 0.06186224489795918, 0.2845003327965815, 1.0] From 169a46ca0d17be7934dbf6091d69ab5df20afcce Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 18 Jan 2021 21:09:14 -0500 Subject: [PATCH 27/30] update learn notebook for calculators to use the new API --- .../notebooks/learn/UsingCalculators.ipynb | 101 ++++++++++++++---- 1 file changed, 80 insertions(+), 21 deletions(-) diff --git a/docs/examples/notebooks/learn/UsingCalculators.ipynb b/docs/examples/notebooks/learn/UsingCalculators.ipynb index db7e9e0fbd..f78f33ef05 100644 --- a/docs/examples/notebooks/learn/UsingCalculators.ipynb +++ b/docs/examples/notebooks/learn/UsingCalculators.ipynb @@ -206,6 +206,66 @@ "p_expected" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, these sorts of steps are somewhat time-consuming and lengthy, and depending on the calculator chosen, may differ a little bit. The calculator API also serves to harmonize the extraction of the observed $p$-values:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CL_sb = 0.08389430261678055\n", + "CL_b = 0.5\n", + "CL_s = CL_sb / CL_b = 0.1677886052335611\n" + ] + } + ], + "source": [ + "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n", + "\n", + "print(f'CL_sb = {p_sb}')\n", + "print(f'CL_b = {p_b}')\n", + "print(f'CL_s = CL_sb / CL_b = {p_s}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "and the expected $p$-values:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "exp. CL_sb = [0.0003632946734314658, 0.008671732658283807, 0.08389430261678055, 0.3522160809113284, 0.7325868885677684]\n", + "exp. CL_b = [0.022750131948179195, 0.15865525393145707, 0.5, 0.8413447460685429, 0.9772498680518208]\n", + "exp. CL_s = CL_sb / CL_b = [0.01596890401598493, 0.05465770873260968, 0.1677886052335611, 0.4186346709326618, 0.7496413276864433]\n" + ] + } + ], + "source": [ + "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n", + "\n", + "print(f'exp. CL_sb = {p_exp_sb}')\n", + "print(f'exp. CL_b = {p_exp_b}')\n", + "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -219,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -255,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -264,7 +324,7 @@ "array(1.90259087)" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -287,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -311,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -325,9 +385,7 @@ } ], "source": [ - "p_sb = sb_dist.pvalue(teststat)\n", - "p_b = b_dist.pvalue(teststat)\n", - "p_s = p_sb / p_b\n", + "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n", "\n", "print(f'CL_sb = {p_sb}')\n", "print(f'CL_b = {p_b}')\n", @@ -343,24 +401,25 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[0.0, 0.047058823529411764, 0.15, 0.3758865248226951, 1.0]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "exp. CL_sb = [0.0, 0.008, 0.084, 0.318, 1.0]\n", + "exp. CL_b = [0.024, 0.17, 0.52, 0.846, 1.0]\n", + "exp. CL_s = CL_sb / CL_b = [0.0, 0.047058823529411764, 0.16153846153846155, 0.3758865248226951, 1.0]\n" + ] } ], "source": [ - "teststat_expected = [b_dist.expected_value(i) for i in [2, 1, 0, -1, -2]]\n", - "p_expected = [sb_dist.pvalue(t) / b_dist.pvalue(t) for t in teststat_expected]\n", - "p_expected" + "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n", + "\n", + "print(f'exp. CL_sb = {p_exp_sb}')\n", + "print(f'exp. CL_b = {p_exp_b}')\n", + "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')" ] } ], From d5b4a4b328cf968ef6cc22bc82b019c4661580e4 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 20 Jan 2021 01:32:58 -0600 Subject: [PATCH 28/30] Use bkg_only to be consistent with sig_plus_bkg naming --- src/pyhf/infer/__init__.py | 6 +++--- src/pyhf/infer/calculators.py | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py index a45d782cc1..36449407b5 100644 --- a/src/pyhf/infer/__init__.py +++ b/src/pyhf/infer/__init__.py @@ -139,17 +139,17 @@ def hypotest( ) teststat = calc.teststatistic(poi_test) - sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test) + sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test) tb, _ = get_backend() CLsb_obs, CLb_obs, CLs_obs = tuple( tb.astensor(pvalue) for pvalue in calc.pvalues( - teststat, sig_plus_bkg_distribution, b_only_distribution + teststat, sig_plus_bkg_distribution, bkg_only_distribution ) ) CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( - sig_plus_bkg_distribution, b_only_distribution + sig_plus_bkg_distribution, bkg_only_distribution ) is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0' diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 4cc14752c9..6cd398e85e 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -306,7 +306,7 @@ def _false_case(): ) return teststat - def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. @@ -334,7 +334,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: @@ -343,11 +343,11 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ CLsb = sig_plus_bkg_distribution.pvalue(teststat) - CLb = b_only_distribution.pvalue(teststat) + CLb = bkg_only_distribution.pvalue(teststat) CLs = CLsb / CLb return CLsb, CLb, CLs - def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the @@ -376,7 +376,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): + bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: @@ -392,10 +392,10 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): zip( *[ self.pvalues( - test_stat, sig_plus_bkg_distribution, b_only_distribution + test_stat, sig_plus_bkg_distribution, bkg_only_distribution ) for test_stat in [ - b_only_distribution.expected_value(n_sigma) + bkg_only_distribution.expected_value(n_sigma) for n_sigma in [2, 1, 0, -1, -2] ] ] @@ -675,7 +675,7 @@ def distributions(self, poi_test, track_progress=None): b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat)) return s_plus_b, b_only - def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): + def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. @@ -705,7 +705,7 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): + bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: @@ -714,11 +714,11 @@ def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution): :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ CLsb = sig_plus_bkg_distribution.pvalue(teststat) - CLb = b_only_distribution.pvalue(teststat) + CLb = bkg_only_distribution.pvalue(teststat) CLs = CLsb / CLb return CLsb, CLb, CLs - def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): + def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the @@ -748,7 +748,7 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. - b_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): + bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: @@ -762,9 +762,9 @@ def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution): self.pvalues( tb.astensor(test_stat), sig_plus_bkg_distribution, - b_only_distribution, + bkg_only_distribution, ) - for test_stat in b_only_distribution.samples + for test_stat in bkg_only_distribution.samples ] ) # TODO: Add percentile to tensorlib From b8207b9df0697dd2c4db0231221c1c990dbd71ff Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 20 Jan 2021 01:41:42 -0600 Subject: [PATCH 29/30] Uncomment pyhf bits from validation scripts --- validation/StandardHypoTestDemo.py | 2 +- validation/run_toys.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/validation/StandardHypoTestDemo.py b/validation/StandardHypoTestDemo.py index 89a4fb8cac..53f52938a0 100644 --- a/validation/StandardHypoTestDemo.py +++ b/validation/StandardHypoTestDemo.py @@ -121,4 +121,4 @@ def pyhf_version(ntoys=5000, seed=0): StandardHypoTestDemo( infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 ) - # pyhf_version() + pyhf_version() diff --git a/validation/run_toys.py b/validation/run_toys.py index c8f6b6bb7f..6941797403 100644 --- a/validation/run_toys.py +++ b/validation/run_toys.py @@ -4,6 +4,7 @@ import pyhf import pyhf.contrib.viz.brazil as brazil import ROOT +import sys def run_toys_ROOT(infile, ntoys): @@ -100,8 +101,7 @@ def run_toys_pyhf(ntoys=2_000, seed=0): if __name__ == "__main__": - # import sys - # run_toys_ROOT( - # infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 - # ) - run_toys_pyhf(ntoys=20_000) + run_toys_ROOT( + infile=sys.argv[1], ntoys=int(sys.argv[2]) if len(sys.argv) > 2 else 2000 + ) + run_toys_pyhf(ntoys=2_000) From 26ada72bc35d46f3746ef8a8aecf0755d4d779b1 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 20 Jan 2021 02:24:33 -0600 Subject: [PATCH 30/30] Make more explicit the percentiles are for Normal dist --- src/pyhf/infer/calculators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py index 6cd398e85e..49aa11edb3 100644 --- a/src/pyhf/infer/calculators.py +++ b/src/pyhf/infer/calculators.py @@ -771,10 +771,11 @@ def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): # c.f. Issue #815, PR #817 import numpy as np - _percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681] + # percentiles for -2, -1, 0, 1, 2 standard deviations of the Normal distribution + normal_percentiles = [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681] pvalues_exp_band = np.percentile( tb.tolist(pvalues), - _percentiles, + normal_percentiles, axis=0, ).T.tolist() return pvalues_exp_band