Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved to ordered collections #936

Merged
merged 2 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions python/Datacard.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import print_function

import six
from collections import OrderedDict


class Datacard:
Expand All @@ -16,57 +17,57 @@ def __init__(self):
## list of [bins in datacard]
self.bins = []
## dict of {bin : number of observed events}
self.obs = {}
self.obs = OrderedDict()
## list of [processes]
self.processes = []
## list of [signal processes]
self.signals = []
## dict of {processes : boolean to indicate whether process is signal or not}
self.isSignal = {}
self.isSignal = OrderedDict()
## list of [(bin, process, boolean to indicate whether process is signal or not)]
self.keyline = []
## dict of {bin : {process : yield}}
self.exp = {}
self.exp = OrderedDict()
## list of [(name of uncert, boolean to indicate whether to float this nuisance or not, type, list of what additional arguments (e.g. for gmN), keyline element)]
self.systs = []
## list of [{bin : {process : [input file, path to shape, path to shape for uncertainty]}}]
self.shapeMap = {}
self.shapeMap = OrderedDict()
## boolean that indicates whether the datacard contains shapes or not
self.hasShapes = False
## dirct of {name of uncert, boolean to indicate whether it is a flat parametric uncertainty or not}
self.flatParamNuisances = {}
self.flatParamNuisances = OrderedDict()
## dict of rateParam, key is f"{bin}AND{process}", per bin/process they are a list
self.rateParams = {}
self.rateParams = OrderedDict()
## dict of extArgs
self.extArgs = {}
self.extArgs = OrderedDict()
## maintain the names of rate modifiers
self.rateParamsOrder = set()
## dirct of {name of uncert, boolean to indicate whether this nuisance is floating or not}
self.frozenNuisances = set()

# Allows for nuisance renaming of "shape" systematics
self.systematicsShapeMap = {}
self.systematicsShapeMap = OrderedDict()

# Allows for nuisance renaming of "param" systematics
self.systematicsParamMap = {}
self.systematicsParamMap = OrderedDict()

# Allow to pick out entry in self.systs.
self.systIDMap = {}
self.systIDMap = OrderedDict()

# Keep edits
self.nuisanceEditLines = []

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = {}
self.binParFlags = OrderedDict()

self.groups = {}
self.groups = OrderedDict()
self.discretes = []

# list of parameters called _norm in user input workspace
self.pdfnorms = {}
self.pdfnorms = OrderedDict()

# collection of nuisances to auto-produce flat priors for
self.toCreateFlatParam = {}
self.toCreateFlatParam = OrderedDict()

def print_structure(self):
"""
Expand Down Expand Up @@ -170,7 +171,7 @@ def print_structure(self):
)

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = {}
self.binParFlags = OrderedDict()

def list_of_bins(self):
"""
Expand Down Expand Up @@ -311,7 +312,7 @@ def renameNuisanceParameter(self, oldname, newname, process_list=[], channel_lis
for specific channels/processes, then you should specify a
process (list or leave empty for all) and channel (list or leave empty for all)
"""
existingclashes = {}
existingclashes = OrderedDict()
for lsyst, nofloat, pdf0, args0, errline0 in self.systs[:]:
if lsyst == newname: # found the nuisance exists
existingclashes[lsyst] = (nofloat, pdf0, args0, errline0)
Expand Down
16 changes: 9 additions & 7 deletions python/DatacardParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from HiggsAnalysis.CombinedLimit.Datacard import Datacard
from HiggsAnalysis.CombinedLimit.NuisanceModifier import doEditNuisance

from collections import OrderedDict

globalNuisances = re.compile("(lumi|pdf_(qqbar|gg|qg)|QCDscale_(ggH|qqH|VH|ggH1in|ggH2in|VV)|UEPS|FakeRate|CMS_(eff|fake|trigger|scale|res)_([gemtjb]|met))")


Expand Down Expand Up @@ -350,7 +352,7 @@ def parseCard(file, options):

# resetting these here to defaults, parseCard will fill them up
ret.discretes = []
ret.groups = {}
ret.groups = OrderedDict()

#
nbins = -1
Expand Down Expand Up @@ -386,7 +388,7 @@ def parseCard(file, options):
if len(f) < 4:
raise RuntimeError("Malformed shapes line")
if f[2] not in ret.shapeMap:
ret.shapeMap[f[2]] = {}
ret.shapeMap[f[2]] = OrderedDict()
if f[1] in ret.shapeMap[f[2]]:
raise RuntimeError("Duplicate definition for process '%s', channel '%s'" % (f[1], f[2]))
ret.shapeMap[f[2]][f[1]] = f[3:]
Expand All @@ -404,7 +406,7 @@ def parseCard(file, options):
if len(binline) != len(ret.obs):
raise RuntimeError("Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins))
ret.bins = binline
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
binline = []
if f[0] == "bin":
binline = []
Expand Down Expand Up @@ -445,10 +447,10 @@ def parseCard(file, options):
raise RuntimeError("Found %d processes (%s), declared jmax = %d" % (len(ret.processes), ret.processes, nprocesses))
if nbins != len(ret.bins):
raise RuntimeError("Found %d bins (%s), declared imax = %d" % (len(ret.bins), ret.bins, nbins))
ret.exp = dict([(b, {}) for b in ret.bins])
ret.isSignal = dict([(p, None) for p in ret.processes])
ret.exp = OrderedDict([(b, OrderedDict()) for b in ret.bins])
ret.isSignal = OrderedDict([(p, None) for p in ret.processes])
if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
for b, p, s in ret.keyline:
if ret.isSignal[p] == None:
ret.isSignal[p] = s
Expand Down Expand Up @@ -631,7 +633,7 @@ def parseCard(file, options):
raise RuntimeError(
"Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline))
)
errline = dict([(b, {}) for b in ret.bins])
errline = OrderedDict([(b, OrderedDict()) for b in ret.bins])
nonNullEntries = 0
for (b, p, s), r in zip(ret.keyline, numbers):
if "/" in r: # "number/number"
Expand Down
14 changes: 8 additions & 6 deletions python/ModelTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT

ROOFIT_EXPR = "expr"
Expand Down Expand Up @@ -54,7 +56,7 @@ def __init__(self, options):
self.out = ROOT.RooWorkspace("w", "w")
# self.out.safe_import = getattr(self.out,"import") # workaround: import is a python keyword
self.out.safe_import = SafeWorkspaceImporter(self.out)
self.objstore = {}
self.objstore = OrderedDict()
self.out.dont_delete = []
if options.verbose == 0:
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
Expand Down Expand Up @@ -221,7 +223,7 @@ def runPostProcesses(self):
self.out.arg(n).setConstant(True)

def doExtArgs(self):
open_files = {}
open_files = OrderedDict()
for rp in self.DC.extArgs.keys():
if self.out.arg(rp):
continue
Expand Down Expand Up @@ -277,7 +279,7 @@ def doExtArgs(self):
def doRateParams(self):
# First support external functions/parameters
# keep a map of open files/workspaces
open_files = {}
open_files = OrderedDict()

for rp in self.DC.rateParams.keys():
for rk in range(len(self.DC.rateParams[rp])):
Expand Down Expand Up @@ -808,7 +810,7 @@ def doFillNuisPdfsAndSets(self):
if p != "constr":
nuisVars.add(self.out.var(c_param_name))
setNuisPdf.append(c_param_name)
setNuisPdf = set(setNuisPdf)
setNuisPdf = list(dict.fromkeys((setNuisPdf)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a few tests, and it seems that this line (and line 826 below) changing the sets to an ordered type are enough for exact reproducibility of the fit values on the datacards where we saw the issue when recreating the workspace. Though it may be worth understanding this more clearly.

-- On the broader point I agree with your suggestion, @nsmith- it would be nice to make those kinds of changes.

for n in setNuisPdf:
nuisPdfs.add(self.out.pdf(n + "_Pdf"))
self.out.defineSet("nuisances", nuisVars)
Expand All @@ -821,7 +823,7 @@ def doFillNuisPdfsAndSets(self):
self.out.defineSet("globalObservables", gobsVars)
else: # doesn't work for too many nuisances :-(
# avoid duplicating _Pdf in list
setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])
setNuisPdf = list(dict.fromkeys(keywords([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])))
self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))
self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf]))
self.doSet("globalObservables", ",".join(self.globalobs))
Expand All @@ -846,7 +848,7 @@ def doAutoFlatNuisancePriors(self):

def doNuisancesGroups(self):
# Prepare a dictionary of which group a certain nuisance belongs to
groupsFor = {}
groupsFor = OrderedDict()
# existingNuisanceNames = tuple(set([syst[0] for syst in self.DC.systs]+self.DC.flatParamNuisances.keys()+self.DC.rateParams.keys()+self.DC.extArgs.keys()+self.DC.discretes))
existingNuisanceNames = self.DC.getAllVariables()
for groupName, nuisanceNames in six.iteritems(self.DC.groups):
Expand Down
6 changes: 4 additions & 2 deletions python/NuisanceModifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import sys
from math import exp, hypot, log

from collections import OrderedDict


def appendMap(tmap, k, thing):
if k in list(tmap.keys()):
Expand Down Expand Up @@ -77,7 +79,7 @@ def doAddNuisance(datacard, args):
cchannel = re.compile(channel.replace("+", r"\+"))
opts = args[5:]
found = False
errline = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
for lsyst, nofloat, pdf0, args0, errline0 in datacard.systs:
if lsyst == name:
if pdf != pdf0:
Expand Down Expand Up @@ -226,7 +228,7 @@ def doRenameNuisance(datacard, args):
if pdf0 == "param":
continue
# for dcs in datacard.systs: print " --> ", dcs
errline2 = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline2 = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
found = False
if newname in list(datacard.systIDMap.keys()):
for id2 in datacard.systIDMap[newname]:
Expand Down
30 changes: 16 additions & 14 deletions python/ShapeTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT
from HiggsAnalysis.CombinedLimit.ModelTools import ModelBuilder

Expand All @@ -31,7 +33,7 @@ class FileCache:
def __init__(self, basedir, maxsize=250):
self._basedir = basedir
self._maxsize = maxsize
self._files = {}
self._files = OrderedDict()
self._hits = defaultdict(int)
self._total = 0

Expand Down Expand Up @@ -71,10 +73,10 @@ def __init__(self, datacard, options):
if options.libs:
for lib in options.libs:
ROOT.gSystem.Load(lib)
self.wspnames = {}
self.wspnames = OrderedDict()
self.wsp = None
self.extraImports = []
self.norm_rename_map = {}
self.norm_rename_map = OrderedDict()
self._fileCache = FileCache(self.options.baseDir)

## ------------------------------------------
Expand Down Expand Up @@ -468,12 +470,12 @@ def RenameDupObjs(self, dupObjs, dupNames, newObj, postFix):
## --------------------------------------
def prepareAllShapes(self):
shapeTypes = []
shapeBins = {}
shapeObs = {}
self.pdfModes = {}
shapeBins = OrderedDict()
shapeObs = OrderedDict()
self.pdfModes = OrderedDict()
for ib, b in enumerate(self.DC.bins):
databins = {}
bgbins = {}
databins = OrderedDict()
bgbins = OrderedDict()
channelBinParFlag = b in list(self.DC.binParFlags.keys())
for p in [self.options.dataname] + list(self.DC.exp[b].keys()):
if len(self.DC.obs) == 0 and p == self.options.dataname:
Expand Down Expand Up @@ -561,7 +563,7 @@ def prepareAllShapes(self):
if i not in bgbins:
stderr.write("Channel %s has bin %d filled in data but empty in all backgrounds\n" % (b, i))
if shapeTypes.count("TH1"):
self.TH1Observables = {}
self.TH1Observables = OrderedDict()
self.out.binVars = ROOT.RooArgSet()
self.out.maxbins = max([shapeBins[k] for k in shapeBins.keys()])
if self.options.optimizeTemplateBins:
Expand Down Expand Up @@ -662,7 +664,7 @@ def doCombinedDataset(self):
## -------------------------------------
## -------- Low level helpers ----------
## -------------------------------------
def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False):
def getShape(self, channel, process, syst="", _cache=OrderedDict(), allowNoSyst=False):
if (channel, process, syst) in _cache:
if self.options.verbose > 2:
print(
Expand Down Expand Up @@ -851,10 +853,10 @@ def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False):
_cache[(channel, process, syst)] = ret
return ret

def getData(self, channel, process, syst="", _cache={}):
def getData(self, channel, process, syst="", _cache=OrderedDict()):
return self.shape2Data(self.getShape(channel, process, syst), channel, process)

def getPdf(self, channel, process, _cache={}):
def getPdf(self, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if (channel, process) in _cache:
return _cache[(channel, process)]
Expand Down Expand Up @@ -1212,7 +1214,7 @@ def rebinH1(self, shape):
rebinh1._original_bins = shapeNbins
return rebinh1

def shape2Data(self, shape, channel, process, _cache={}):
def shape2Data(self, shape, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if shape == None:
name = "shape%s_%s_%s" % (postFix, channel, process)
Expand Down Expand Up @@ -1248,7 +1250,7 @@ def shape2Data(self, shape, channel, process, _cache={}):
raise RuntimeError("shape2Data not implemented for %s" % shape.ClassName())
return _cache[shape.GetName()]

def shape2Pdf(self, shape, channel, process, _cache={}):
def shape2Pdf(self, shape, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
channelBinParFlag = channel in list(self.DC.binParFlags.keys())
if shape == None:
Expand Down
Loading