Skip to content

Commit

Permalink
better fix
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasheinrich committed Nov 8, 2020
1 parent d954274 commit 4f79f05
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 76 deletions.
39 changes: 11 additions & 28 deletions src/pyhf/infer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,42 +146,25 @@ def hypotest(
teststat = calc.teststatistic(poi_test)
sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)

CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb

tensorlib, _ = get_backend()
# Ensure that all CL values are 0-d tensors
CLsb, CLb, CLs = (
tensorlib.astensor(CLsb),
tensorlib.astensor(CLb),
tensorlib.astensor(CLs),
pvalues = CLsb_obs, CLb_obs, CLs_obs = calc.pvalues(
teststat, sig_plus_bkg_distribution, b_only_distribution
)
tb, _ = get_backend()
CLsb_obs, CLb_obs, CLs_obs = tuple(tb.astensor(x) for x in pvalues)
CLs_exp = list(
tb.astensor(x)
for x in calc.expected_pvalues(sig_plus_bkg_distribution, b_only_distribution)
)

_returns = [CLs]
_returns = [CLs_obs]
if return_tail_probs:
_returns.append([CLsb, CLb])
_returns.append([CLsb_obs, CLb_obs])
if return_expected_set:
CLs_exp = []
for n_sigma in [2, 1, 0, -1, -2]:

expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)

CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
CLs_exp.append(tensorlib.astensor(CLs))
if return_expected:
_returns.append(CLs_exp[2])
_returns.append(CLs_exp)
elif return_expected:
n_sigma = 0
expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)

CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
_returns.append(tensorlib.astensor(CLs))
_returns.append(CLs_exp[2])
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]

Expand Down
42 changes: 41 additions & 1 deletion src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,18 @@ def _false_case():
)
return teststat

def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb
return CLsb, CLb, CLs

def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
return [
self.pvalues(v, sig_plus_bkg_distribution, b_only_distribution)[-1]
for v in [b_only_distribution.expected_value(x) for x in [2, 1, 0, -1, -2]]
]


class EmpiricalDistribution:
"""
Expand Down Expand Up @@ -375,7 +387,11 @@ def pvalue(self, value):
"""
tensorlib, _ = get_backend()
return (
tensorlib.sum(tensorlib.where(self.samples >= value, 1, 0))
tensorlib.sum(
tensorlib.where(
self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0)
)
)
/ tensorlib.shape(self.samples)[0]
)

Expand Down Expand Up @@ -566,6 +582,30 @@ def distributions(self, poi_test, track_progress=None):
b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))
return s_plus_b, b_only

def pvalues(self, teststat, sig_plus_bkg_distribution, b_only_distribution):
CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb
return CLsb, CLb, CLs

def expected_pvalues(self, sig_plus_bkg_distribution, b_only_distribution):
tb, _ = get_backend()
pvalues = tb.astensor(
[
self.pvalues(
tb.astensor(x), sig_plus_bkg_distribution, b_only_distribution
)[-1]
for x in b_only_distribution.samples
]
)
import numpy as np

CLs_exp = np.percentile(
tb.tolist(pvalues),
[2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681],
)
return tb.astensor(CLs_exp)

def teststatistic(self, poi_test):
"""
Compute the test statistic for the observed data under the studied model.
Expand Down
1 change: 1 addition & 0 deletions src/pyhf/infer/intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def upperlimit(data, model, scan, level=0.05, return_results=False):
]
obs = tb.astensor([[r[0]] for r in results])
exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results])

result_arrary = tb.concatenate([obs, exp], axis=1).T

# observed limit and the (0, +-1, +-2)sigma expected limits
Expand Down
18 changes: 9 additions & 9 deletions tests/test_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,16 +343,16 @@ def test_toy_calculator(tmpdir, hypotest_args):
)
assert qtilde_mu_bkg.samples.tolist() == pytest.approx(
[
2.2664625238298868,
1.081660885985002,
2.757022090389057,
1.3835689306226868,
0.4707466955809423,
2.2664625749100082,
1.081660887453154,
2.7570218408936853,
1.3835691388297846,
0.4707467005909507,
0.0,
3.7166483694865065,
3.8021896741039427,
5.114135403520493,
1.35111537136072,
3.7166483705294127,
3.8021896732709592,
5.114135391143066,
1.3511153731000718,
],
1e-07,
)
Expand Down
99 changes: 61 additions & 38 deletions validation/StandardHypoTestDemo.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,78 @@
import ROOT
import numpy as np


def StandardHypoTestDemo(
infile,
ntoys,
workspaceName = "combined",
modelSBName = "ModelConfig",
dataName = "obsData",
workspaceName="combined",
modelSBName="ModelConfig",
dataName="obsData",
):

file = ROOT.TFile.Open(infile);
w = file.Get(workspaceName);
sbModel = w.obj(modelSBName);
data = w.data(dataName);

bModel = sbModel.Clone();
bModel.SetName(modelSBName+"B_only");
var = (bModel.GetParametersOfInterest().first());
oldval = var.getVal();
var.setVal(0);
bModel.SetSnapshot( ROOT.RooArgSet(var) );
var.setVal(oldval);

var = sbModel.GetParametersOfInterest().first();
oldval = var.getVal();
sbModel.SetSnapshot( ROOT.RooArgSet(var) );

profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf());
profll.SetOneSidedDiscovery(False);
profll.SetOneSided(True);
hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel);
# profll.SetPrintLevel(2)
file = ROOT.TFile.Open(infile)
w = file.Get(workspaceName)
sbModel = w.obj(modelSBName)
data = w.data(dataName)

bModel = sbModel.Clone()
bModel.SetName(modelSBName + "B_only")
var = bModel.GetParametersOfInterest().first()
oldval = var.getVal()
var.setVal(0)
bModel.SetSnapshot(ROOT.RooArgSet(var))
var.setVal(oldval)

var = sbModel.GetParametersOfInterest().first()
oldval = var.getVal()
sbModel.SetSnapshot(ROOT.RooArgSet(var))

print('by hand',profll.Evaluate(data,ROOT.RooArgSet(var) ))
profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
profll.SetOneSidedDiscovery(False)
profll.SetOneSided(True)
hypoCalc = ROOT.RooStats.FrequentistCalculator(data, bModel, sbModel)
# profll.SetPrintLevel(2)

print('by hand', profll.Evaluate(data, ROOT.RooArgSet(var)))

hypoCalc.SetToys(ntoys, ntoys)

sampler = hypoCalc.GetTestStatSampler();
sampler.SetTestStatistic(profll);
sampler = hypoCalc.GetTestStatSampler()
sampler.SetTestStatistic(profll)

paramPointNull = ROOT.RooArgSet(sbModel.GetParametersOfInterest())

htr = hypoCalc.GetHypoTest();
htr.SetPValueIsRightTail(True);
htr.SetBackgroundAsAlt(True);
htr.Print();
plot = ROOT.RooStats.HypoTestPlot(htr,100);
plot.SetLogYaxis(True);
plot.Draw();
ROOT.gPad.SaveAs("plot.png");

htr = hypoCalc.GetHypoTest()
htr.SetPValueIsRightTail(True)
htr.SetBackgroundAsAlt(True)

cls_obs = htr.CLs()
ds = htr.GetNullDistribution()
db = htr.GetAltDistribution()

r = ROOT.RooStats.HypoTestResult()
r.SetPValueIsRightTail(True)
r.SetNullDistribution(ds)
r.SetAltDistribution(db)

values = [
1 / (r.SetTestStatisticData(v), r.CLs())[-1]
for v in ds.GetSamplingDistribution()
]
print(values)
values = np.percentile(
values, [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]
)
print(values)

htr.Print()
plot = ROOT.RooStats.HypoTestPlot(htr, 100)
plot.SetLogYaxis(True)
plot.Draw()
ROOT.gPad.SaveAs("plot.png")


import sys

if __name__ == '__main__':
StandardHypoTestDemo(sys.argv[1],int(sys.argv[2]) if len(sys.argv) > 2 else 2000)
StandardHypoTestDemo(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else 2000)

0 comments on commit 4f79f05

Please sign in to comment.