diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 646ec6005bf..81a571f14a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,3 +65,9 @@ jobs: run: | cd HiggsAnalysis/CombinedLimit make CONDA=1 -j 2 + - name: Run tests + if: startsWith(matrix.python, '3.') + shell: bash -l {0} + run: | + cd HiggsAnalysis/CombinedLimit/test + python test_interference.py diff --git a/Makefile b/Makefile index 06e935731e5..3f077a271e6 100644 --- a/Makefile +++ b/Makefile @@ -53,11 +53,12 @@ DICTNAME=$(LIBNAME)_xr # Linker and flags ------------------------------------------------------------- LD = $(shell root-config --ld) ROOTLDFLAGS = $(shell root-config --ldflags) +ROOTLIBDIR = $(shell root-config --libdir) # OS x specific linkage DARWIN := $(shell uname|grep Darwin) ifdef DARWIN -LDFLAGS = $(ROOTLDFLAGS) -shared -install_name @rpath/$(SONAME) -fPIC -EXELDFLAGS = -Wl,-rpath,'@executable_path/../lib' +LDFLAGS = $(ROOTLDFLAGS) -g -shared -install_name @rpath/$(SONAME) -fPIC +EXELDFLAGS = -Wl,-rpath,'@executable_path/../lib' -Wl,-rpath,$(ROOTLIBDIR) else LDFLAGS = $(ROOTLDFLAGS) -shared -Wl,-soname,$(SONAME) -Wl,-E -Wl,-z,defs -fPIC EXELDFLAGS = diff --git a/docs/part2/physicsmodels.md b/docs/part2/physicsmodels.md index e5822449f54..a00cf0249d8 100644 --- a/docs/part2/physicsmodels.md +++ b/docs/part2/physicsmodels.md @@ -213,19 +213,19 @@ The `PhysicsModel` that encodes the signal model above is the [twoHypothesisHigg The two processes (S and S_ALT) will get different scaling parameters. The LEP-style likelihood for hypothesis testing can now be performed by setting **x** or **not_x** to 1 and 0 and comparing two likelihood evaluations. -### Interference +### Signal-background interference -Since there are no such things as negative probability distribution functions, the recommended way to implement this is to start from the expression for the individual amplitudes and the parameter of interest $k$, +Since there are no such things as negative probability distribution functions, the recommended way to implement this is to start from the expression for the individual amplitudes $A$ and the parameter of interest $k$, $$ -\mathrm{Yield} = (k * A_{s} + A_{b})^2 -= k^2 * A_{s}^2 + k * 2 A_{s} A_{b} + A_{b}^2 +\mathrm{Yield} = |k * A_{s} + A_{b}|^2 += k^2 * |A_{s}|^2 + k * 2 \Re(A_{s}^* A_{b}) + |A_{b}|^2 = \mu * S + \sqrt{\mu} * I + B $$ where -$\mu = k^2, ~S = A_{s}^2,~B = Ab^2$ and $ S+B+I = (As + Ab)^2$. +$\mu = k^2, ~S = |A_{s}|^2,~B = |A_b|^2$ and $S+B+I = |A_s + A_b|^2$. With some algebra you can work out that, @@ -244,3 +244,113 @@ An example of this scheme is implemented in a [HiggsWidth](https://gitlab.cern.c self.modelBuilder.factory_( "expr::qqH_b_func(\"1-sqrt(@0)\", CMS_zz4l_mu)") self.modelBuilder.factory_( "expr::qqH_sbi_func(\"sqrt(@0)\", CMS_zz4l_mu)") ``` + +### Multi-process interference + +The above formulation can be extended to multiple parameters of interest +(POIs). See +[AnalyticAnomalousCoupling](https://github.com/amassiro/AnalyticAnomalousCoupling) +for an example. However, the computational performance scales quadratically +with the number of POIs, and can get extremely expensive for 10 or more, as may +be encountered often with EFT analyses. To alleviate this issue, an accelerated +interference modeling technique is implemented for template-based analyses via +the `interferenceModel` physics model. In this model, each bin yield $y$ is parameterized +$$y(\theta) = y_0 (\theta^\top M \theta)$$ +as a function of the POI vector $\theta$, a nominal template $y_0$, and a scaling matrix $M$. +To see how this parameterization relates to that of the previous section, we can define: + +$$ +y_0 = A_b^2, \qquad +M = \frac{1}{A_b^2} \begin{bmatrix} + |A_s|^2 & \Re(A_s^* A_b) \\ + \Re(A_s A_b^*) & |A_b|^2 + \end{bmatrix}, \qquad \theta = \begin{bmatrix} + \sqrt{\mu} \\ + 1 + \end{bmatrix} +$$ + +which leads to the same parameterization. At present, this technique only works with +`CMSHistFunc`-based workspaces, as these are the most common workspace types +encountered and the default when using +[autoMCStats](https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats). +To use this model, for each bin find $y_0$ and put it into the datacard as a signal process, then find $M$ and +save the lower triangular component as an array in a `scaling.json` file with a +syntax as follows: + +```json +[ + { + "channel": "my_channel", + "process": "my_nominal_process", + "parameters": ["sqrt_mu[1,0,2]", "Bscaling[1]"], + "scaling": [ + [0.5, 0.1, 1.0], + [0.6, 0.2, 1.0], + [0.7, 0.3, 1.0] + ] + } +] +``` + +where the parameters are declared using RooFit's [factory +syntax](https://root.cern.ch/doc/v622/classRooWorkspace.html#a0ddded1d65f5c6c4732a7a3daa8d16b0) +and each row of the `scaling` field represents the scaling information of a bin, e.g. if $y_0 = |A_b|^2$ +then each row would contain three entries: +$$|A_s|^2 / |A_b|^2,\quad \Re(A_s^* A_b)/|A_b|^2,\quad 1$$ + +For several coefficients, one would enumerate as follows: +```python +scaling = [] +for ibin in range(nbins): + binscaling = [] + for icoef in range(ncoef): + for jcoef in range(icoef + 1): + binscaling.append(amplitude_squared_for(ibin, icoef, jcoef)) + scaling.append(binscaling) +``` + +Then, to construct the workspace, run + +```bash +text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ + --PO verbose --PO scalingData=scaling.json +``` +For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) +files with 2D numpy arrays for the scaling coefficients instead of lists. The function `numpy.tril_indices(ncoef)` +is helpful for extracting the lower triangle of a square matrix. + +You could pick any +nominal template, and adjust the scaling as appropriate. Generally it is +advisable to use a nominal template corresponding to near where you expect the +POIs to land so that the shape systematic effects are well-modeled in that +region. + +It may be the case that the relative contributions of the terms are themselves +a function of the POIs. For example, in VBF di-Higgs production, BSM +modifications to the production rate can be parameterized in the "kappa" +framework via three diagrams, with scaling coefficients $\kappa_V \kappa_\lambda$, +$\kappa_V^2$, and $\kappa_{2V}$, respectively, that interfere. In +that case, you can declare formulas with the factory syntax to represent each +amplitude as follows: + +```json +[ + { + "channel": "a_vbf_channel", + "process": "VBFHH", + "parameters": ["expr::a0('@0*@1', kv[1,0,2], kl[1,0,2])", "expr::a1('@0*@0', kv[1,0,2])", "k2v[1,0,2]"], + "scaling": [ + [3.30353674666415, -8.54170982038222, 22.96464188467882, 4.2353483207128, -11.07996258835088, 5.504469544697623], + [2.20644332142891, -7.076836641962523, 23.50989689214267, 4.053185685866683, -13.08569222837996, 7.502346155380032] + ] + } +] +``` + +However, you will need to manually specify what the POIs should be when creating the workspace using the `POIs=` physics option, e.g. + +```bash +text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ + --PO scalingData=scaling.json --PO 'POIs=kl[1,0,2]:kv[1,0,2]:k2v[1,0,2]' +``` diff --git a/interface/CMSExternalMorph.h b/interface/CMSExternalMorph.h new file mode 100644 index 00000000000..8889a6e792c --- /dev/null +++ b/interface/CMSExternalMorph.h @@ -0,0 +1,45 @@ +#ifndef CMSExternalMorph_h +#define CMSExternalMorph_h +#include + +#include "RooAbsReal.h" +#include "RooRealVar.h" +#include "RooRealProxy.h" + +class CMSExternalMorph : public RooAbsReal { + public: + CMSExternalMorph(); + /* + * All subclasses need to provide an edges array of length nbins+1 + * of the observable (x) + * TODO: CMSHistFunc and CMSHistSum do not check the binning is compatible + * with their binning other than having the correct length + */ + CMSExternalMorph( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges + ); + CMSExternalMorph(CMSExternalMorph const& other, const char* name = 0); + virtual ~CMSExternalMorph(); + + /* Batch accessor for CMSHistFunc / CMSHistSum, to be overriden by concrete + * implementations. hasChanged() should indicate whether or not + * batchGetBinValues() would return a new vector, given the state of + * any dependent variables. + */ + virtual bool hasChanged() const = 0; + virtual const std::vector& batchGetBinValues() const = 0; + + protected: + RooRealProxy x_; + std::vector edges_; + + double evaluate() const; + + private: + ClassDef(CMSExternalMorph, 1) +}; + +#endif // CMSExternalMorph_h diff --git a/interface/CMSHistFunc.h b/interface/CMSHistFunc.h index 53c09837458..28d0b5c6d86 100644 --- a/interface/CMSHistFunc.h +++ b/interface/CMSHistFunc.h @@ -15,6 +15,7 @@ #include "CMSHistV.h" #include "FastTemplate_Old.h" #include "SimpleCacheSentry.h" +#include "CMSExternalMorph.h" class CMSHistFuncWrapper; @@ -85,7 +86,7 @@ class CMSHistFunc : public RooAbsReal { virtual TObject* clone(const char* newname) const { return new CMSHistFunc(*this, newname); } - virtual ~CMSHistFunc() {} + virtual ~CMSHistFunc(); void addHorizontalMorph(RooAbsReal& hvar, TVectorD hpoints); @@ -148,6 +149,7 @@ class CMSHistFunc : public RooAbsReal { friend class CMSHistV; friend class CMSHistSum; + void injectExternalMorph(CMSExternalMorph& morph); /* – RooAbsArg::setVerboseEval(Int_t level) • Level 0 – No messages @@ -190,6 +192,9 @@ class CMSHistFunc : public RooAbsReal { static bool enable_fast_vertical_; //! not to be serialized + // This is an "optional proxy", i.e. a list with either zero or one entry + RooListProxy external_morph_; + private: void initialize() const; void setGlobalCache() const; @@ -214,7 +219,7 @@ class CMSHistFunc : public RooAbsReal { void applyRebin() const; - ClassDef(CMSHistFunc, 1) + ClassDef(CMSHistFunc, 2) }; diff --git a/interface/CMSHistSum.h b/interface/CMSHistSum.h index 4fe3e375ac9..23b39c95635 100644 --- a/interface/CMSHistSum.h +++ b/interface/CMSHistSum.h @@ -73,9 +73,13 @@ class CMSHistSum : public RooAbsReal { RooArgList const& coefList() const { return coeffpars_; } // RooArgList const& funcList() const { return funcs_; } + RooAbsReal const& getXVar() const { return x_.arg(); } + static void EnableFastVertical(); friend class CMSHistV; + void injectExternalMorph(int idx, CMSExternalMorph& morph); + protected: RooRealProxy x_; @@ -128,6 +132,9 @@ class CMSHistSum : public RooAbsReal { mutable int fast_mode_; //! not to be serialized static bool enable_fast_vertical_; //! not to be serialized + RooListProxy external_morphs_; + std::vector external_morph_indices_; + inline int& morphField(int const& ip, int const& iv) { return vmorph_fields_[ip * n_morphs_ + iv]; } @@ -143,7 +150,7 @@ class CMSHistSum : public RooAbsReal { private: - ClassDef(CMSHistSum,1) + ClassDef(CMSHistSum,2) }; #endif diff --git a/interface/CMSInterferenceFunc.h b/interface/CMSInterferenceFunc.h new file mode 100644 index 00000000000..14be41fcca2 --- /dev/null +++ b/interface/CMSInterferenceFunc.h @@ -0,0 +1,55 @@ +#ifndef CMSInterferenceFunc_h +#define CMSInterferenceFunc_h +#include "RooListProxy.h" +#include "SimpleCacheSentry.h" + +#include "CMSExternalMorph.h" + +class _InterferenceEval; + +class CMSInterferenceFunc : public CMSExternalMorph { + public: + CMSInterferenceFunc(); + CMSInterferenceFunc(CMSInterferenceFunc const& other, const char* name = 0); + /* + * For a coefficients list of length n and edges array of length b+1, + * the binscaling nested vector should have b entries (for b bins) with + * each being of length n*(n+1)/2, corresponding to the lower triangular + * elements of the scaling matrix, i.e. (m_00, m_10, m_11, m_20, m_21, m_22, ...) + */ + CMSInterferenceFunc( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges, + const RooArgList& coefficients, + const std::vector> binscaling + ); + virtual ~CMSInterferenceFunc(); + + virtual TObject* clone(const char* newname) const override { + return new CMSInterferenceFunc(*this, newname); + }; + + void printMultiline( + std::ostream& os, Int_t contents, Bool_t verbose, TString indent + ) const override; + + bool hasChanged() const override { return !sentry_.good(); }; + const std::vector& batchGetBinValues() const override; + + protected: + RooListProxy coefficients_; + std::vector> binscaling_; + + mutable SimpleCacheSentry sentry_; //! + mutable std::unique_ptr<_InterferenceEval> evaluator_; //! + + private: + void initialize() const; + void updateCache() const; + + ClassDefOverride(CMSInterferenceFunc, 1) +}; + +#endif // CMSInterferenceFunc_h diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py new file mode 100644 index 00000000000..16d6ba3bcfe --- /dev/null +++ b/python/InterferenceModels.py @@ -0,0 +1,184 @@ +import gzip +import json +import pickle +import re + +import numpy as np +import ROOT +from HiggsAnalysis.CombinedLimit.PhysicsModel import PhysicsModelBase_NiceSubclasses + + +def read_scaling(path): + if path.endswith(".json"): + with open(path) as fin: + out = json.load(fin) + elif path.endswith(".json.gz"): + with gzip.open(path, "rt") as fin: + out = json.load(fin) + elif path.endswith(".pkl.gz"): + with gzip.open(path, "rb") as fin: + out = pickle.load(fin) + else: + raise RuntimeError("Unrecognized scaling data path {path}; must be either .json, .json.gz, or .pkl.gz".format(path=path)) + # normalize + if not isinstance(out, list): + raise RuntimeError("Scaling data in invalid format: expected list") + expected_fields = {"channel", "process", "parameters", "scaling"} + for item in out: + if not isinstance(item, dict): + raise RuntimeError("Scaling data in invalid format: expected each item in list to be a dict") + missing = expected_fields - set(item) + if missing: + raise RuntimeError("Missing fields in scaling item: {missing}".format(missing=missing)) + shortname = item["channel"] + "/" + item["process"] + if not all(isinstance(par, str) for par in item["parameters"]): + raise RuntimeError("Parameters must be a list of strings in {shortname}".format(shortname=shortname)) + try: + # coerce into numpy with C-contiguous memory (needed for fast std::vector copy) + item["scaling"] = np.ascontiguousarray(item["scaling"], dtype=float) + except ValueError: + # python3: raise from ex + raise RuntimeError("Scaling data invalid: could not normalize array for {shortname}".format(shortname=shortname)) + if len(item["scaling"].shape) != 2: + raise RuntimeError("Scaling data invalid: array shape incorrect for {shortname}".format(shortname=shortname)) + npar = len(item["parameters"]) + if item["scaling"].shape[1] != npar * (npar + 1) // 2: + raise RuntimeError("Scaling data invalid: array has insufficent terms for parameters in {shortname}".format(shortname=shortname)) + return out + + +class InterferenceModel(PhysicsModelBase_NiceSubclasses): + def __init__(self): + self.verbose = False + self.scaling = None + self.scaling_map = {} + self.explict_pois = None + super(InterferenceModel, self).__init__() + + def processPhysicsOptions(self, physOptions): + processed = [] + for po in physOptions: + if po == "verbose": + self.verbose = True + processed.append(po) + elif po.startswith("scalingData="): + self.scaling = read_scaling(po[len("scalingData=") :]) + processed.append(po) + elif po.startswith("POIs="): + self.explict_pois = po[len("POIs=") :].split(":") + processed.append(po) + if self.scaling is None: + raise RuntimeError("Must specify --PO=scalingData= physics option!") + # make map for quick lookup + for item in self.scaling: + self.scaling_map[(item["channel"], item["process"])] = item + return processed + super(InterferenceModel, self).processPhysicsOptions(physOptions) + + def getPOIList(self): + poiNames = [] + if self.explict_pois: + if self.verbose: + print("Building explicitly requested POIs:") + for poi in self.explict_pois: + if self.verbose: + print(" - {poi}".format(poi=poi)) + self.modelBuilder.doVar(poi) + poiname = re.sub(r"\[.*", "", poi) + if not self.modelBuilder.out.var(poiname): + raise RuntimeError("Could not find POI {poiname} after factory call".format(poiname=poiname)) + poiNames.append(poiname) + # RooFit would also detect duplicate params with mismatched factory, but this is faster + constructed = {} + for item in self.scaling: + item["parameters_inworkspace"] = [] + for param in item["parameters"]: + if param in constructed: + rooparam = constructed[param] + else: + rooparam = self.modelBuilder.factory_(param) + if not rooparam: + raise RuntimeError("Failed to build {param}".format(param=param)) + if not self.explict_pois and isinstance(rooparam, ROOT.RooRealVar) and not rooparam.isConstant(): + if self.verbose: + print("Assuming parameter {param} is to be a POI (name: {name})".format(param=param, name=rooparam.GetName())) + poiNames.append(rooparam.GetName()) + constructed[param] = rooparam + # save for later use in making CMSInterferenceFunc + item["parameters_inworkspace"].append(rooparam) + if self.verbose: + print("All POIs: {poiNames}".format(poiNames=poiNames)) + return poiNames + + def getYieldScale(self, channel, process): + string = "%s/%s" % (channel, process) + try: + item = self.scaling_map[(channel, process)] + except KeyError: + if self.verbose: + print("Will scale {string} by 1".format(string=string)) + return 1 + print("Will scale {string} using CMSInterferenceFunc dependent on parameters {parameters}".format(string=string, parameters=item["parameters"])) + item["found"] = True + # We don't actually scale the total via this mechanism + # and we'll set up the shape effects in done() since shapes aren't available yet + return 1 + + def done(self): + for item in self.scaling_map.values(): + channel = item["channel"] + process = item["process"] + string = "%s/%s" % (channel, process) + if "found" not in item: + if self.verbose: + print("Did not find {string} in workspace, even though it is in scaling data".format(string=string)) + continue + + hfname = "shapeSig_{channel}_{process}_morph".format(channel=channel, process=process) + histfunc = self.modelBuilder.out.function(hfname) + if not histfunc: + # if there are no systematics, it ends up with a different name? + hfname = "shapeSig_{process}_{channel}_rebinPdf".format(channel=channel, process=process) + histfunc = self.modelBuilder.out.function(hfname) + if not histfunc: + # assume this is a CMSHistSum workspace + histsum_name = "prop_bin{channel}".format(channel=channel) + histfunc = self.modelBuilder.out.function(histsum_name) + # in this workspace there is no object representing the process morphing + # make one up so that funcname is still unique + hfname = "prop_bin{channel}_process{process}".format(channel=channel, process=process) + # TODO: support FastVerticalInterpHistPdf2 + if not isinstance(histfunc, (ROOT.CMSHistFunc, ROOT.CMSHistSum)): + raise RuntimeError( + "Could not locate the CMSHistFunc or CMSHistSum for {string}.\n".format(string=string) + + "Note that CMSInterferenceFunc currently only supports workspaces that use these classes" + ) + + funcname = hfname + "_externalMorph" + + tpl = histfunc.cache() + edges = [tpl.GetEdge(i) for i in range(tpl.size() + 1)] + params = ROOT.RooArgList() + for p in item["parameters_inworkspace"]: + params.add(p) + + scaling_array = ROOT.std.vector["std::vector"]() + nbins, ncoef = item["scaling"].shape + scaling_array.reserve(nbins) + for sbin in item["scaling"]: + scaling_array.push_back(sbin) + + self.modelBuilder.out.safe_import(ROOT.CMSInterferenceFunc(funcname, "", histfunc.getXVar(), edges, params, scaling_array)) + func = self.modelBuilder.out.function(funcname) + if isinstance(histfunc, ROOT.CMSHistFunc): + histfunc.injectExternalMorph(func) + elif isinstance(histfunc, ROOT.CMSHistSum): + postfix = "bin{channel}_proc_{process}".format(channel=channel, process=process) + for idx, coef in enumerate(histfunc.coefList()): + if coef.GetName() in ("n_exp_" + postfix, "n_exp_final_" + postfix): + if self.verbose: + print("Injecting external morph for " + funcname) + histfunc.injectExternalMorph(idx, func) + break + + +interferenceModel = InterferenceModel() diff --git a/src/CMSExternalMorph.cc b/src/CMSExternalMorph.cc new file mode 100644 index 00000000000..95e7d25d10e --- /dev/null +++ b/src/CMSExternalMorph.cc @@ -0,0 +1,33 @@ +#include "../interface/CMSExternalMorph.h" + +CMSExternalMorph::CMSExternalMorph() {} + +CMSExternalMorph::CMSExternalMorph( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges + ) : + RooAbsReal(name, title), + x_("x", "", this, x), + edges_(edges) +{ +} + +CMSExternalMorph::CMSExternalMorph(CMSExternalMorph const& other, const char* name) : + RooAbsReal(other, name), + x_("x", this, other.x_), + edges_(other.edges_) +{ +} + +CMSExternalMorph::~CMSExternalMorph() = default; + +double CMSExternalMorph::evaluate() const { + auto it = std::upper_bound(std::begin(edges_), std::end(edges_), x_->getVal()); + if ( (it == std::begin(edges_)) or (it == std::end(edges_)) ) { + return 0.0; + } + size_t idx = std::distance(std::begin(edges_), it) - 1; + return batchGetBinValues()[idx]; +} diff --git a/src/CMSHistFunc.cc b/src/CMSHistFunc.cc index 6c7463d7943..045f802e278 100644 --- a/src/CMSHistFunc.cc +++ b/src/CMSHistFunc.cc @@ -48,7 +48,8 @@ CMSHistFunc::CMSHistFunc(const char* name, const char* title, RooRealVar& x, vtype_(VerticalSetting::QuadLinear), divide_by_width_(divide_by_width), vsmooth_par_(1.0), - fast_vertical_(false) { + fast_vertical_(false), + external_morph_("external_morph", "", this) { if (divide_by_width_) { for (unsigned i = 0; i < cache_.size(); ++i) { cache_[i] /= cache_.GetWidth(i); @@ -120,16 +121,25 @@ CMSHistFunc::CMSHistFunc(CMSHistFunc const& other, const char* name) vtype_(other.vtype_), divide_by_width_(other.divide_by_width_), vsmooth_par_(other.vsmooth_par_), - fast_vertical_(false) { + fast_vertical_(false), + external_morph_("external_morph", this, other.external_morph_) +{ // initialize(); } +CMSHistFunc::~CMSHistFunc() = default; + void CMSHistFunc::initialize() const { if (initialized_) return; vmorph_sentry_.SetName(TString(this->GetName()) + "_vmorph_sentry"); hmorph_sentry_.SetName(TString(this->GetName()) + "_hmorph_sentry"); hmorph_sentry_.addVars(hmorphs_); vmorph_sentry_.addVars(vmorphs_); + if ( external_morph_.getSize() ) { + RooArgSet* deps = external_morph_.at(0)->getParameters({*x_}); + vmorph_sentry_.addVars(*deps); + delete deps; + } hmorph_sentry_.setValueDirty(); vmorph_sentry_.setValueDirty(); vmorphs_vec_.resize(vmorphs_.getSize()); @@ -313,7 +323,8 @@ void CMSHistFunc::updateCache() const { if (mcache_.size() == 0) mcache_.resize(storage_.size()); } - if (step1) { + bool external_morph_updated = (external_morph_.getSize() && static_cast(external_morph_.at(0))->hasChanged()); + if (step1 || external_morph_updated) { fast_vertical_ = false; } @@ -551,6 +562,16 @@ void CMSHistFunc::updateCache() const { } if (!fast_vertical_) { mcache_[idx].step2 = mcache_[idx].step1; + if ( external_morph_.getSize() ) { +#if HFVERBOSE > 0 + std::cout << "Template before external morph update:" << mcache_[idx].step2.Integral() << "\n"; + mcache_[idx].step2.Dump(); +#endif + auto& extdata = static_cast(external_morph_.at(0))->batchGetBinValues(); + for(size_t i=0; igetParameters({*x_}); + sentry_.addVars(*deps); + delete deps; + } + sentry_.setValueDirty(); binsentry_.setValueDirty(); @@ -250,6 +260,16 @@ void CMSHistSum::initialize() const { } void CMSHistSum::updateMorphs() const { + // set up pointers ahead of time for quick loop + std::vector process_morphs(compcache_.size(), nullptr); + // if any external morphs are dirty, disable fast_mode_ + for(size_t i=0; i < external_morph_indices_.size(); ++i) { + auto* morph = static_cast(external_morphs_.at(i)); + process_morphs[external_morph_indices_[i]] = morph; + if (morph->hasChanged()) { + fast_mode_ = 0; + } + } // If we're not in fast mode, need to reset all the compcache_ #if HFVERBOSE > 0 std::cout << "fast_mode_ = " << fast_mode_ << std::endl; @@ -257,6 +277,12 @@ void CMSHistSum::updateMorphs() const { for (unsigned ip = 0; ip < compcache_.size(); ++ip) { if (fast_mode_ == 0) { compcache_[ip].CopyValues(storage_[process_fields_[ip]]); + if ( process_morphs[ip] != nullptr ) { + auto& extdata = process_morphs[ip]->batchGetBinValues(); + for(size_t ibin=0; ibin= coeffpars_.getSize() ) { + throw std::runtime_error("Process index larger than number of processes in CMSHistSum"); + } + if ( morph.batchGetBinValues().size() != cache_.size() ) { + throw std::runtime_error("Mismatched binning between external morph and CMSHistSum"); + // equal edges are user responsibility for now + } + + for (auto other_idx : external_morph_indices_) { + if ( idx == other_idx ) { + external_morphs_.replace(external_morphs_[idx], morph); + return; + } + } + external_morph_indices_.push_back(idx); + external_morphs_.add(morph); +} + #undef HFVERBOSE diff --git a/src/CMSInterferenceFunc.cxx b/src/CMSInterferenceFunc.cxx new file mode 100644 index 00000000000..e0ecc8e056f --- /dev/null +++ b/src/CMSInterferenceFunc.cxx @@ -0,0 +1,128 @@ +#include "../interface/CMSInterferenceFunc.h" +#include "TBuffer.h" +#include + +class _InterferenceEval { + public: + _InterferenceEval(std::vector> scaling_in, size_t ncoef) { + binscaling_.reserve(scaling_in.size()); + for(const auto& bin : scaling_in) { + Eigen::MatrixXd mat(ncoef, ncoef); + size_t k=0; + for(size_t i=0; i& getValues() const { return values_; }; + + private: + std::vector binscaling_; + Eigen::VectorXd coefficients_; + std::vector values_; +}; + + +CMSInterferenceFunc::CMSInterferenceFunc() {}; + +CMSInterferenceFunc::CMSInterferenceFunc( + CMSInterferenceFunc const& other, const char* name + ) : + CMSExternalMorph(other, name), + coefficients_("coefficients", this, other.coefficients_), + binscaling_(other.binscaling_), + sentry_(name ? TString(name) + "_sentry" : TString(other.GetName())+"_sentry", "") +{ +} + +CMSInterferenceFunc::CMSInterferenceFunc( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges, + RooArgList const& coefficients, + const std::vector> binscaling + ) : + CMSExternalMorph(name, title, x, edges), + coefficients_("coefficients", "", this), + binscaling_(binscaling), + sentry_(TString(name) + "_sentry", "") +{ + coefficients_.add(coefficients); +} + +CMSInterferenceFunc::~CMSInterferenceFunc() = default; + +void CMSInterferenceFunc::printMultiline( + std::ostream& os, Int_t contents, Bool_t verbose, TString indent + ) const { + RooAbsReal::printMultiline(os, contents, verbose, indent); + os << ">> Sentry: " << (sentry_.good() ? "clean" : "dirty") << "\n"; + sentry_.Print("v"); +} + +void CMSInterferenceFunc::initialize() const { + // take the opportunity to validate persistent data + size_t nbins = edges_.size() - 1; + size_t ncoef = coefficients_.getSize(); + + for (size_t i=0; i < ncoef; ++i) { + if ( coefficients_.at(i) == nullptr ) { + throw std::invalid_argument("Lost coefficient " + std::to_string(i)); + } + if ( not coefficients_.at(i)->InheritsFrom("RooAbsReal") ) { + throw std::invalid_argument( + "Coefficient " + std::to_string(i) + " is not a RooAbsReal" + ); + } + } + if ( binscaling_.size() != nbins ) { + throw std::invalid_argument( + "Number of bins as determined from bin edges (" + + std::to_string(nbins) + ") does not match bin" + " scaling array (" + std::to_string(binscaling_.size()) + ")" + ); + } + for (size_t i=0; i < nbins; ++i) { + if ( binscaling_[i].size() != ncoef*(ncoef+1)/2 ) { + throw std::invalid_argument( + "Length of bin scaling matrix lower triangle for bin " + std::to_string(i) + + " (" + std::to_string(binscaling_[i].size() ) + ") is not consistent" + + " with the length of the coefficient array (" + std::to_string(ncoef) + ")" + ); + } + } + + sentry_.SetName(TString(GetName()) + "_sentry"); + sentry_.addVars(coefficients_); + sentry_.setValueDirty(); + evaluator_ = std::make_unique<_InterferenceEval>(binscaling_, ncoef); +} + +void CMSInterferenceFunc::updateCache() const { + for (int i=0; i < coefficients_.getSize(); ++i) { + auto* coef = static_cast(coefficients_.at(i)); + if ( coef == nullptr ) throw std::runtime_error("Lost coef!"); + evaluator_->setCoefficient(i, coef->getVal()); + } + evaluator_->computeValues(); + sentry_.reset(); +} + +const std::vector& CMSInterferenceFunc::batchGetBinValues() const { + if ( not evaluator_ ) initialize(); + if ( not sentry_.good() ) updateCache(); + return evaluator_->getValues(); +} + diff --git a/src/RobustHesse.cc b/src/RobustHesse.cc index 41371dc7b50..136682e83c5 100644 --- a/src/RobustHesse.cc +++ b/src/RobustHesse.cc @@ -101,6 +101,13 @@ int RobustHesse::setParameterStencil(unsigned i) { double boundLo = rrv->getMin(); double boundHi = rrv->getMax(); + // If we skip initial fit (e.g. to compute from asimov point) + // need a guess for the initial step size + if ( rrv->getError() == 0.0 ) { + valLo = x + 1e-3*(boundLo - x); + valHi = x + 1e-3*(boundHi - x); + } + bool closeToLo = valLo < boundLo; bool closeToHi = valHi > boundHi; diff --git a/src/classes.h b/src/classes.h index 9374e48d881..867ffeee1c9 100644 --- a/src/classes.h +++ b/src/classes.h @@ -66,4 +66,6 @@ #include "HiggsAnalysis/CombinedLimit/interface/RooCheapProduct.h" #include "HiggsAnalysis/CombinedLimit/interface/CMSHggFormula.h" #include "HiggsAnalysis/CombinedLimit/interface/SimpleProdPdf.h" +#include "HiggsAnalysis/CombinedLimit/interface/CMSExternalMorph.h" +#include "HiggsAnalysis/CombinedLimit/interface/CMSInterferenceFunc.h" #include "HiggsAnalysis/CombinedLimit/interface/RooEFTScalingFunction.h" diff --git a/src/classes_def.xml b/src/classes_def.xml index 2c891d14a4f..3bf3eb4f689 100644 --- a/src/classes_def.xml +++ b/src/classes_def.xml @@ -222,5 +222,7 @@ + + diff --git a/test/test_interference.py b/test/test_interference.py new file mode 100755 index 00000000000..a97e2414093 --- /dev/null +++ b/test/test_interference.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +import ROOT +import numpy as np +import subprocess +import json + + +def array2vector_2d(array): + assert len(array.shape) == 2 + out = ROOT.std.vector["std::vector"]() + out.reserve(len(array)) + for row in array: + out.push_back(row) + return out + + +def to_TH1(name, array): + h = ROOT.TH1D(name, name, 20, 250, 1200) + for i, val in enumerate(array): + h.SetBinContent(i + 1, val) + return h + + +# make some shapes +fout = ROOT.TFile("shapes.root", "recreate") +nom = 100 * np.array( + [ + 0.14253603, + 0.21781641, + 0.22698837, + 0.19603483, + 0.19259561, + 0.15552859, + 0.13909682, + 0.09438712, + 0.08521593, + 0.06878416, + 0.06419854, + 0.04318116, + 0.04776676, + 0.03057073, + 0.02866007, + 0.02292805, + 0.02139951, + 0.02063524, + 0.01222829, + 0.01337466, + ] +) +obs = nom.copy() + +hnom = to_TH1("VBFHH", nom) +for i, val in enumerate(nom): + hnom.SetBinError(i + 1, np.sqrt(val) * 0.1) +hnom.Write() +to_TH1("VBFHH_jesUp", nom * np.linspace(0.95, 1.05, 20)).Write() +to_TH1("VBFHH_jesDown", nom / np.linspace(0.95, 1.05, 20)).Write() + +nom = 100 * np.exp(-np.linspace(0, 1, 20)) +obs += nom +hnom = to_TH1("background", nom) +for i, val in enumerate(nom): + hnom.SetBinError(i + 1, np.sqrt(val) * 0.1) +hnom.Write() +to_TH1("background_jesUp", nom * np.linspace(0.95, 1.05, 20)).Write() +to_TH1("background_jesDown", nom / np.linspace(0.95, 1.05, 20)).Write() + +to_TH1("data_obs", np.round(obs)).Write() + +# write a card +with open("card.txt", "w") as fout: + fout.write( + """\ +Combination of card.txt +imax 1 number of bins +jmax 1 number of processes minus 1 +kmax 3 number of nuisance parameters +---------------------------------------------------------------------------------------------------------------------------------- +shapes * ch1 shapes.root $PROCESS $PROCESS_$SYSTEMATIC +---------------------------------------------------------------------------------------------------------------------------------- +bin ch1 +observation -1 +---------------------------------------------------------------------------------------------------------------------------------- +bin ch1 ch1 +process VBFHH background +process 0 1 +rate -1 -1 +---------------------------------------------------------------------------------------------------------------------------------- +bgnorm lnN - 1.3 +jes shape 1.0 1.0 +lumi lnN 1.02 1.02 +* autoMCStats 0 +""" + ) + +# write the scaling data +scaling = [ + { + "channel": "ch1", + "process": "VBFHH", + "parameters": [ + "expr::a0('@0*@1', kv[1,0,2], kl[1,0,2])", + "expr::a1('@0*@0', kv[1,0,2])", + "k2v[1,0,2]", + ], + "scaling": [ + [3.30353674666415, -8.54170982038222, 22.96464188467882, 4.2353483207128, -11.07996258835088, 5.504469544697623], + [2.20644332142891, -7.076836641962523, 23.50989689214267, 4.053185685866683, -13.08569222837996, 7.502346155380032], + [2.323314512827915, -9.040565356058327, 35.97836755817654, 6.135410429832603, -23.87500375686657, 16.25863529518014], + [0.5925805332888091, -3.139640204484572, 17.13589230378096, 1.976422909655008, -10.515009347222, 6.627980447033357], + [0.3505170703269003, -2.237551781735236, 14.84170437843385, 1.483246407331564, -9.493221195626012, 6.302831691298613], + [0.2334740816002986, -1.79981530542472, 14.40275907010786, 1.22573469124098, -9.473328823485497, 6.458585723630323], + [0.1959374052725543, -1.757624939190617, 16.26453309470772, 1.257071800464856, -11.27433100208976, 8.089297781650776], + [0.186504029519429, -1.822069966644771, 18.49884116334009, 1.391201452068932, -13.60944960795888, 10.39529105220993], + [0.0972664880869715, -1.169327097322687, 14.59008882076805, 0.863528399754145, -10.3792682464399, 7.682778579161866], + [0.0878055201507951, -1.156467907936071, 15.89008007570035, 0.9032722960981284, -11.89269332401088, 9.31389227584649], + [0.2006114827320551, -2.776793529232688, 39.12099794229794, 2.335437756631023, -32.32054661687562, 27.20219535392458], + [0.05179066270217598, -0.8550833654170061, 14.82948643635128, 0.6753041305320768, -11.17147343869588, 8.821228248108168], + [0.05095452842967559, -0.948210749952708, 18.25692153880631, 0.7561787601252029, -14.08060600559859, 11.23739992361621], + [0.01801729907958822, -0.4102710173962256, 9.828612567782274, 0.2950547079487041, -6.724075992501888, 4.831954737036956], + [0.02762233787081839, -0.6400852678490596, 15.46123935330864, 0.5127706114146487, -11.88156027745066, 9.528888176590677], + [0.031654990010153, -0.7790988142691099, 19.86193598270219, 0.648607157613526, -15.97020317079789, 13.30779868219462], + [0.01316620750610005, -0.3821583465978776, 11.67177892863827, 0.2914985575363581, -8.480579644763935, 6.457533731506537], + [0.02886802887767344, -0.8369930524359994, 24.91187028333814, 0.7103796160970196, -20.57801184441048, 17.4685122492831], + [0.02930989281275648, -0.9242683589392606, 29.81526833971985, 0.7918145320034853, -25.01001674094063, 21.4403629032202], + [0.0516036089235802, -1.673006024492507, 55.06880032083917, 1.526329404862879, -49.49344845658728, 45.15984622267106], + ], + }, +] + +with open("scaling.json", "w") as fout: + json.dump(scaling, fout) + +t2wcmd = [ + "text2workspace.py", + "card.txt", + "-P", + "HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel", + "--PO", + "verbose", + "--PO", + "scalingData=scaling.json", + "--PO", + "POIs=kl[1,0,2]:kv[1,0,2]:k2v[1,0,2]", +] + +ret = subprocess.call(t2wcmd) +assert ret == 0 + +histsum_args = ["--for-fits", "--no-wrappers", "--use-histsum", "-o", "card_histsum.root"] +ret = subprocess.call(t2wcmd + histsum_args) +assert ret == 0 + +fws = ROOT.TFile.Open("card.root") +w = fws.Get("w") + + +def setvars(x, kl, kv, k2v): + w.var("CMS_th1x").setVal(x) + w.var("kl").setVal(kl) + w.var("kv").setVal(kv) + w.var("k2v").setVal(k2v) + + +func = w.function("shapeSig_ch1_VBFHH_morph_externalMorph") +assert func + +setvars(0, 1, 1, 1) +assert abs(func.getVal() - 1.0) < 1e-14, func.getVal() +setvars(1, 1.1, 1, 1) +assert abs(func.getVal() - 0.8586229062809139) < 1e-14, func.getVal() +setvars(2, 1.1, 0.9, 1.3) +assert abs(func.getVal() - 4.372110974178483) < 1e-14, func.getVal() + +# toy generation is different between the histsum and histfunc models, somehow +ntoys = 10 +ret = subprocess.call("combine -M GenerateOnly card.root -t {ntoys} --saveToys".format(ntoys=ntoys).split(" ")) +assert ret == 0 + +ret = subprocess.call( + "combine -M MultiDimFit card.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistFunc".format(ntoys=ntoys).split(" ") +) +assert ret == 0 + +ret = subprocess.call( + "combine -M MultiDimFit card_histsum.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistSum".format(ntoys=ntoys).split(" ") +) +assert ret == 0 + +f_histfunc = ROOT.TFile.Open("higgsCombineHistFunc.MultiDimFit.mH120.123456.root") +f_histsum = ROOT.TFile.Open("higgsCombineHistSum.MultiDimFit.mH120.123456.root") + +ndiff = {"kl": 0, "kv": 0, "k2v": 0} +for row1, row2 in zip(f_histfunc.Get("limit"), f_histsum.Get("limit")): + if abs(row1.kl - row2.kl) > 1e-4: + ndiff["kl"] += 1 + if abs(row1.kv - row2.kv) > 1e-4: + ndiff["kv"] += 1 + if abs(row1.k2v - row2.k2v) > 1e-4: + ndiff["k2v"] += 1 + +print("Out of {ntoys} toys, {ndiff} are not matching (tolerance: 1e-4) between CMSHistFunc and CMSHistSum".format(ntoys=ntoys, ndiff=ndiff))