From c1b4bd4cd89911e8a4bfa60c15f1852478b12fcb Mon Sep 17 00:00:00 2001 From: Nick Wardle Date: Thu, 11 Apr 2024 12:05:55 +0200 Subject: [PATCH 1/2] Moved to ordered collections Main fix needed is in ModelTools `setNuisPdf = set(setNuisPdf)` -> `setNuisPdf = list(dict.fromkeys((setNuisPdf)))` --- python/Datacard.py | 34 +++++++++++++++++----------------- python/DatacardParser.py | 16 +++++++++------- python/ModelTools.py | 14 ++++++++------ python/NuisanceModifier.py | 5 +++-- python/ShapeTools.py | 30 ++++++++++++++++-------------- 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/python/Datacard.py b/python/Datacard.py index 70df8121503..ec2bec9bba0 100644 --- a/python/Datacard.py +++ b/python/Datacard.py @@ -1,7 +1,7 @@ from __future__ import print_function import six - +from collections import OrderedDict class Datacard: """ @@ -16,57 +16,57 @@ def __init__(self): ## list of [bins in datacard] self.bins = [] ## dict of {bin : number of observed events} - self.obs = {} + self.obs = OrderedDict() ## list of [processes] self.processes = [] ## list of [signal processes] self.signals = [] ## dict of {processes : boolean to indicate whether process is signal or not} - self.isSignal = {} + self.isSignal = OrderedDict() ## list of [(bin, process, boolean to indicate whether process is signal or not)] self.keyline = [] ## dict of {bin : {process : yield}} - self.exp = {} + self.exp = OrderedDict() ## list of [(name of uncert, boolean to indicate whether to float this nuisance or not, type, list of what additional arguments (e.g. for gmN), keyline element)] self.systs = [] ## list of [{bin : {process : [input file, path to shape, path to shape for uncertainty]}}] - self.shapeMap = {} + self.shapeMap = OrderedDict() ## boolean that indicates whether the datacard contains shapes or not self.hasShapes = False ## dirct of {name of uncert, boolean to indicate whether it is a flat parametric uncertainty or not} - self.flatParamNuisances = {} + self.flatParamNuisances = OrderedDict() ## dict of rateParam, key is f"{bin}AND{process}", per bin/process they are a list - self.rateParams = {} + self.rateParams = OrderedDict() ## dict of extArgs - self.extArgs = {} + self.extArgs = OrderedDict() ## maintain the names of rate modifiers self.rateParamsOrder = set() ## dirct of {name of uncert, boolean to indicate whether this nuisance is floating or not} self.frozenNuisances = set() # Allows for nuisance renaming of "shape" systematics - self.systematicsShapeMap = {} + self.systematicsShapeMap = OrderedDict() # Allows for nuisance renaming of "param" systematics - self.systematicsParamMap = {} + self.systematicsParamMap = OrderedDict() # Allow to pick out entry in self.systs. - self.systIDMap = {} + self.systIDMap = OrderedDict() # Keep edits self.nuisanceEditLines = [] # map of which bins should have automated Barlow-Beeston parameters - self.binParFlags = {} + self.binParFlags = OrderedDict() - self.groups = {} + self.groups = OrderedDict() self.discretes = [] # list of parameters called _norm in user input workspace - self.pdfnorms = {} + self.pdfnorms = OrderedDict() # collection of nuisances to auto-produce flat priors for - self.toCreateFlatParam = {} + self.toCreateFlatParam = OrderedDict() def print_structure(self): """ @@ -170,7 +170,7 @@ def print_structure(self): ) # map of which bins should have automated Barlow-Beeston parameters - self.binParFlags = {} + self.binParFlags = OrderedDict() def list_of_bins(self): """ @@ -311,7 +311,7 @@ def renameNuisanceParameter(self, oldname, newname, process_list=[], channel_lis for specific channels/processes, then you should specify a process (list or leave empty for all) and channel (list or leave empty for all) """ - existingclashes = {} + existingclashes = OrderedDict() for lsyst, nofloat, pdf0, args0, errline0 in self.systs[:]: if lsyst == newname: # found the nuisance exists existingclashes[lsyst] = (nofloat, pdf0, args0, errline0) diff --git a/python/DatacardParser.py b/python/DatacardParser.py index 88b805009e4..234f4312c54 100644 --- a/python/DatacardParser.py +++ b/python/DatacardParser.py @@ -9,6 +9,8 @@ from HiggsAnalysis.CombinedLimit.Datacard import Datacard from HiggsAnalysis.CombinedLimit.NuisanceModifier import doEditNuisance +from collections import OrderedDict + globalNuisances = re.compile("(lumi|pdf_(qqbar|gg|qg)|QCDscale_(ggH|qqH|VH|ggH1in|ggH2in|VV)|UEPS|FakeRate|CMS_(eff|fake|trigger|scale|res)_([gemtjb]|met))") @@ -350,7 +352,7 @@ def parseCard(file, options): # resetting these here to defaults, parseCard will fill them up ret.discretes = [] - ret.groups = {} + ret.groups = OrderedDict() # nbins = -1 @@ -386,7 +388,7 @@ def parseCard(file, options): if len(f) < 4: raise RuntimeError("Malformed shapes line") if f[2] not in ret.shapeMap: - ret.shapeMap[f[2]] = {} + ret.shapeMap[f[2]] = OrderedDict() if f[1] in ret.shapeMap[f[2]]: raise RuntimeError("Duplicate definition for process '%s', channel '%s'" % (f[1], f[2])) ret.shapeMap[f[2]][f[1]] = f[3:] @@ -404,7 +406,7 @@ def parseCard(file, options): if len(binline) != len(ret.obs): raise RuntimeError("Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins)) ret.bins = binline - ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) + ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) binline = [] if f[0] == "bin": binline = [] @@ -445,10 +447,10 @@ def parseCard(file, options): raise RuntimeError("Found %d processes (%s), declared jmax = %d" % (len(ret.processes), ret.processes, nprocesses)) if nbins != len(ret.bins): raise RuntimeError("Found %d bins (%s), declared imax = %d" % (len(ret.bins), ret.bins, nbins)) - ret.exp = dict([(b, {}) for b in ret.bins]) - ret.isSignal = dict([(p, None) for p in ret.processes]) + ret.exp = OrderedDict([(b, OrderedDict()) for b in ret.bins]) + ret.isSignal = OrderedDict([(p, None) for p in ret.processes]) if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names - ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) + ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) for b, p, s in ret.keyline: if ret.isSignal[p] == None: ret.isSignal[p] = s @@ -631,7 +633,7 @@ def parseCard(file, options): raise RuntimeError( "Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline)) ) - errline = dict([(b, {}) for b in ret.bins]) + errline = OrderedDict([(b, OrderedDict()) for b in ret.bins]) nonNullEntries = 0 for (b, p, s), r in zip(ret.keyline, numbers): if "/" in r: # "number/number" diff --git a/python/ModelTools.py b/python/ModelTools.py index 6b0101693c2..771bbc45e32 100644 --- a/python/ModelTools.py +++ b/python/ModelTools.py @@ -10,6 +10,8 @@ import six from six.moves import range +from collections import OrderedDict + import ROOT ROOFIT_EXPR = "expr" @@ -54,7 +56,7 @@ def __init__(self, options): self.out = ROOT.RooWorkspace("w", "w") # self.out.safe_import = getattr(self.out,"import") # workaround: import is a python keyword self.out.safe_import = SafeWorkspaceImporter(self.out) - self.objstore = {} + self.objstore = OrderedDict() self.out.dont_delete = [] if options.verbose == 0: ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR) @@ -221,7 +223,7 @@ def runPostProcesses(self): self.out.arg(n).setConstant(True) def doExtArgs(self): - open_files = {} + open_files = OrderedDict() for rp in self.DC.extArgs.keys(): if self.out.arg(rp): continue @@ -277,7 +279,7 @@ def doExtArgs(self): def doRateParams(self): # First support external functions/parameters # keep a map of open files/workspaces - open_files = {} + open_files = OrderedDict() for rp in self.DC.rateParams.keys(): for rk in range(len(self.DC.rateParams[rp])): @@ -808,7 +810,7 @@ def doFillNuisPdfsAndSets(self): if p != "constr": nuisVars.add(self.out.var(c_param_name)) setNuisPdf.append(c_param_name) - setNuisPdf = set(setNuisPdf) + setNuisPdf = list(dict.fromkeys((setNuisPdf))) for n in setNuisPdf: nuisPdfs.add(self.out.pdf(n + "_Pdf")) self.out.defineSet("nuisances", nuisVars) @@ -821,7 +823,7 @@ def doFillNuisPdfsAndSets(self): self.out.defineSet("globalObservables", gobsVars) else: # doesn't work for too many nuisances :-( # avoid duplicating _Pdf in list - setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]) + setNuisPdf = list(dict.fromkeys(keywords([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))) self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])) self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf])) self.doSet("globalObservables", ",".join(self.globalobs)) @@ -846,7 +848,7 @@ def doAutoFlatNuisancePriors(self): def doNuisancesGroups(self): # Prepare a dictionary of which group a certain nuisance belongs to - groupsFor = {} + groupsFor = OrderedDict() # existingNuisanceNames = tuple(set([syst[0] for syst in self.DC.systs]+self.DC.flatParamNuisances.keys()+self.DC.rateParams.keys()+self.DC.extArgs.keys()+self.DC.discretes)) existingNuisanceNames = self.DC.getAllVariables() for groupName, nuisanceNames in six.iteritems(self.DC.groups): diff --git a/python/NuisanceModifier.py b/python/NuisanceModifier.py index e97e7f60fe6..02ea5a43b57 100644 --- a/python/NuisanceModifier.py +++ b/python/NuisanceModifier.py @@ -4,6 +4,7 @@ import sys from math import exp, hypot, log +from collections import OrderedDict def appendMap(tmap, k, thing): if k in list(tmap.keys()): @@ -77,7 +78,7 @@ def doAddNuisance(datacard, args): cchannel = re.compile(channel.replace("+", r"\+")) opts = args[5:] found = False - errline = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins]) + errline = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins]) for lsyst, nofloat, pdf0, args0, errline0 in datacard.systs: if lsyst == name: if pdf != pdf0: @@ -226,7 +227,7 @@ def doRenameNuisance(datacard, args): if pdf0 == "param": continue # for dcs in datacard.systs: print " --> ", dcs - errline2 = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins]) + errline2 = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins]) found = False if newname in list(datacard.systIDMap.keys()): for id2 in datacard.systIDMap[newname]: diff --git a/python/ShapeTools.py b/python/ShapeTools.py index f6fad3e57f9..3eaaf7011dc 100644 --- a/python/ShapeTools.py +++ b/python/ShapeTools.py @@ -9,6 +9,8 @@ import six from six.moves import range +from collections import OrderedDict + import ROOT from HiggsAnalysis.CombinedLimit.ModelTools import ModelBuilder @@ -31,7 +33,7 @@ class FileCache: def __init__(self, basedir, maxsize=250): self._basedir = basedir self._maxsize = maxsize - self._files = {} + self._files = OrderedDict() self._hits = defaultdict(int) self._total = 0 @@ -71,10 +73,10 @@ def __init__(self, datacard, options): if options.libs: for lib in options.libs: ROOT.gSystem.Load(lib) - self.wspnames = {} + self.wspnames = OrderedDict() self.wsp = None self.extraImports = [] - self.norm_rename_map = {} + self.norm_rename_map = OrderedDict() self._fileCache = FileCache(self.options.baseDir) ## ------------------------------------------ @@ -468,12 +470,12 @@ def RenameDupObjs(self, dupObjs, dupNames, newObj, postFix): ## -------------------------------------- def prepareAllShapes(self): shapeTypes = [] - shapeBins = {} - shapeObs = {} - self.pdfModes = {} + shapeBins = OrderedDict() + shapeObs = OrderedDict() + self.pdfModes = OrderedDict() for ib, b in enumerate(self.DC.bins): - databins = {} - bgbins = {} + databins = OrderedDict() + bgbins = OrderedDict() channelBinParFlag = b in list(self.DC.binParFlags.keys()) for p in [self.options.dataname] + list(self.DC.exp[b].keys()): if len(self.DC.obs) == 0 and p == self.options.dataname: @@ -561,7 +563,7 @@ def prepareAllShapes(self): if i not in bgbins: stderr.write("Channel %s has bin %d filled in data but empty in all backgrounds\n" % (b, i)) if shapeTypes.count("TH1"): - self.TH1Observables = {} + self.TH1Observables = OrderedDict() self.out.binVars = ROOT.RooArgSet() self.out.maxbins = max([shapeBins[k] for k in shapeBins.keys()]) if self.options.optimizeTemplateBins: @@ -662,7 +664,7 @@ def doCombinedDataset(self): ## ------------------------------------- ## -------- Low level helpers ---------- ## ------------------------------------- - def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False): + def getShape(self, channel, process, syst="", _cache=OrderedDict(), allowNoSyst=False): if (channel, process, syst) in _cache: if self.options.verbose > 2: print( @@ -851,10 +853,10 @@ def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False): _cache[(channel, process, syst)] = ret return ret - def getData(self, channel, process, syst="", _cache={}): + def getData(self, channel, process, syst="", _cache=OrderedDict()): return self.shape2Data(self.getShape(channel, process, syst), channel, process) - def getPdf(self, channel, process, _cache={}): + def getPdf(self, channel, process, _cache=OrderedDict()): postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg" if (channel, process) in _cache: return _cache[(channel, process)] @@ -1212,7 +1214,7 @@ def rebinH1(self, shape): rebinh1._original_bins = shapeNbins return rebinh1 - def shape2Data(self, shape, channel, process, _cache={}): + def shape2Data(self, shape, channel, process, _cache=OrderedDict()): postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg" if shape == None: name = "shape%s_%s_%s" % (postFix, channel, process) @@ -1248,7 +1250,7 @@ def shape2Data(self, shape, channel, process, _cache={}): raise RuntimeError("shape2Data not implemented for %s" % shape.ClassName()) return _cache[shape.GetName()] - def shape2Pdf(self, shape, channel, process, _cache={}): + def shape2Pdf(self, shape, channel, process, _cache=OrderedDict()): postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg" channelBinParFlag = channel in list(self.DC.binParFlags.keys()) if shape == None: From 8209cb594a93306336d28031026307f3070704ee Mon Sep 17 00:00:00 2001 From: Nick Wardle Date: Thu, 11 Apr 2024 12:14:02 +0200 Subject: [PATCH 2/2] Just formatting for python --- python/Datacard.py | 1 + python/ModelTools.py | 2 +- python/NuisanceModifier.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/Datacard.py b/python/Datacard.py index ec2bec9bba0..5f1cc2d1fec 100644 --- a/python/Datacard.py +++ b/python/Datacard.py @@ -3,6 +3,7 @@ import six from collections import OrderedDict + class Datacard: """ Description: diff --git a/python/ModelTools.py b/python/ModelTools.py index 771bbc45e32..fbb822750dd 100644 --- a/python/ModelTools.py +++ b/python/ModelTools.py @@ -823,7 +823,7 @@ def doFillNuisPdfsAndSets(self): self.out.defineSet("globalObservables", gobsVars) else: # doesn't work for too many nuisances :-( # avoid duplicating _Pdf in list - setNuisPdf = list(dict.fromkeys(keywords([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))) + setNuisPdf = list(dict.fromkeys(keywords([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))) self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])) self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf])) self.doSet("globalObservables", ",".join(self.globalobs)) diff --git a/python/NuisanceModifier.py b/python/NuisanceModifier.py index 02ea5a43b57..d5eaf885a1f 100644 --- a/python/NuisanceModifier.py +++ b/python/NuisanceModifier.py @@ -6,6 +6,7 @@ from collections import OrderedDict + def appendMap(tmap, k, thing): if k in list(tmap.keys()): if thing not in tmap[k]: