cms-analysis · anigamova · Jun 7, 2024 · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/python/tool_base/CombineToolBase.py b/python/tool_base/CombineToolBase.py
diff --git a/python/tool_base/CovMatrix.py b/python/tool_base/CovMatrix.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import print_function
+import ROOT
+
+import HiggsAnalysis.CombinedLimit.tool_base.utils as utils
+from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS
+
+from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase
+from six.moves import range
+import ctypes
+
+class CovMatrix(CombineToolBase):
+    description = 'Build a fit covariance matrix from scan results'
+    requires_root = True
+
+    def __init__(self):
+        CombineToolBase.__init__(self)
+
+    def attach_args(self, group):
+        CombineToolBase.attach_args(self, group)
+        group.add_argument('-i', '--input', nargs='+', default=[],
+                           help='The input file containing the MultiDimFit singles mode output')
+        group.add_argument(
+            '-o', '--output', help='The output name in the format file:prefix')
+        group.add_argument(
+            '-P', '--POIs', help='The params that were scanned (in scan order)')
+        group.add_argument(
+            '--POIs-from-set', help='Extract from file:workspace:set instead')
+        group.add_argument('--compare', help='Compare to RooFitResult')
+
+    def run_method(self):
+        POIs = []
+        if args.POIs is not None:
+            POIs = args.POIs.split(',')
+        if args.POIs_from_set is not None:
+            ws_in = args.POIs_from_set.split(':')
+            print(ws_in)
+            POIs = list_from_workspace(ws_in[0], ws_in[1], ws_in[2])
+        # res = { }
+        # if len(args.input) == 1:
+        #   res.update(get_singles_results(args.input, POIs, POIs))
+        # elif len(args.input) > 1:
+        #   assert len(args.input) == len(POIs)
+        #   for i in range(len(POIs)):
+        #     res.update(get_singles_results(args.input[i], [POIs[i]], POIs))
+        # for p in POIs:
+        #   val = res[p][p]
+        #   print '%s = %.3f -%.3f/+%.3f' % (p, val[1], val[1] - val[0], val[2] - val[1])
+        # print res
+        # cor = ROOT.TMatrixDSym(len(POIs))
+        # cov = ROOT.TMatrixDSym(len(POIs))
+        # for i,p in enumerate(POIs):
+        # cor[i][i] = ctypes.c_double(1.) # diagonal correlation is 1
+        # cov[i][i] = ctypes.c_double(pow((res[p][p][2] - res[p][p][0])/2.,2.)) # symmetrized error
+        # for i,ip in enumerate(POIs):
+        #   for j,jp in enumerate(POIs):
+        #     if i == j: continue
+        #     val_i = ((res[ip][jp][2] - res[ip][jp][0])/2.)/math.sqrt(cov[j][j])
+        #     val_j = ((res[jp][ip][2] - res[jp][ip][0])/2.)/math.sqrt(cov[i][i])
+        # correlation = (val_i+val_j)/2. # take average correlation?
+        # correlation = min(val_i,val_j, key=abs) # take the max?
+        #     cor[i][j] = correlation
+        #     cor[j][i] = correlation
+        #     covariance = correlation * math.sqrt(cov[i][i]) * math.sqrt(cov[j][j])
+        #     cov[i][j] = covariance
+        #     cov[j][i] = covariance
+        compare = args.compare is not None
+        if compare:
+            f_in = args.compare.split(':')
+            f = ROOT.TFile(f_in[0])
+            fitres = f.Get(f_in[1])
+            fitres_cov = ROOT.TMatrixDSym(len(POIs))
+            fitres_cov_src = fitres.covarianceMatrix()
+            fitres_cor = ROOT.TMatrixDSym(len(POIs))
+            fitres_cor_src = fitres.correlationMatrix()
+            ipos = []
+            for p in POIs:
+                ipos.append(fitres.floatParsFinal().index(p))
+            for i, ip in enumerate(POIs):
+                for j, jp in enumerate(POIs):
+                    fitres_cor[i][j] = ctypes.c_double(
+                        fitres_cor_src[ipos[i]][ipos[j]])
+                    fitres_cov[i][j] = ctypes.c_double(
+                        fitres_cov_src[ipos[i]][ipos[j]])
+        # print 'My correlation matrix:'
+        # cor.Print()
+        if compare:
+            print('RooFitResult correlation matrix:')
+            fitres_cor.Print()
+        # print 'My covariance matrix:'
+        # cov.Print()
+        if compare:
+            print('RooFitResult covariance matrix:')
+            fitres_cov.Print()
+        if args.output is not None:
+            out = args.output.split(':')
+            fout = ROOT.TFile(out[0], 'RECREATE')
+            prefix = out[1]
+            # fout.WriteTObject(cor, prefix+'_cor')
+            # h_cor = self.fix_TH2(ROOT.TH2D(cor), POIs)
+            # fout.WriteTObject(h_cor, prefix+'_h_cor')
+            # fout.WriteTObject(cov, prefix+'_cov')
+            # h_cov = self.fix_TH2(ROOT.TH2D(cov), POIs)
+            # fout.WriteTObject(h_cov, prefix+'_h_cov')
+            if compare:
+                fout.WriteTObject(fitres_cor, prefix + '_comp_cor')
+                h_cor_compare = self.fix_TH2(ROOT.TH2D(fitres_cor), POIs)
+                fout.WriteTObject(h_cor_compare, prefix + '_comp_h_cor')
+                fout.WriteTObject(fitres_cov, prefix + '_comp_cov')
+                h_cov_compare = self.fix_TH2(ROOT.TH2D(fitres_cov), POIs)
+                fout.WriteTObject(h_cov_compare, prefix + '_comp_h_cov')
+
+    def fix_TH2(self, h, labels):
+        h_fix = h.Clone()
+        for y in range(1, h.GetNbinsY() + 1):
+            for x in range(1, h.GetNbinsX() + 1):
+                h_fix.SetBinContent(
+                    x, y, h.GetBinContent(x, h.GetNbinsY() + 1 - y))
+        for x in range(1, h_fix.GetNbinsX() + 1):
+            h_fix.GetXaxis().SetBinLabel(x, labels[x - 1])
+        for y in range(1, h_fix.GetNbinsY() + 1):
+            h_fix.GetYaxis().SetBinLabel(y, labels[-y])
+        return h_fix
diff --git a/python/tool_base/EnhancedCombine.py b/python/tool_base/EnhancedCombine.py
@@ -0,0 +1,255 @@
+from __future__ import absolute_import
+from __future__ import print_function
+import itertools
+import HiggsAnalysis.CombinedLimit.tool_base.utils as utils
+import json
+import os
+import bisect
+from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS
+from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase
+import six
+from six.moves import zip
+
+
+def isfloat(value):
+    try:
+        float(value)
+        return True
+    except ValueError:
+        return False
+
+
+class EnhancedCombine(CombineToolBase):
+    description = 'combine pass-through with special treatment for some options [DEFAULT]'
+    requires_root = False
+
+    def __init__(self):
+        CombineToolBase.__init__(self)
+
+    def attach_intercept_args(self, group):
+        CombineToolBase.attach_intercept_args(self, group)
+        group.add_argument(
+            '-m', '--mass', help='Supports range strings for multiple masses, e.g. "120:130:5,140 will produce three combine calls with mass values of 120, 125, 130 and 140"')
+        group.add_argument(
+            '--points', help='For use with "-M MultiDimFit --algo grid" to split scan points into separate jobs')
+        group.add_argument(
+            '--singlePoint', help='Supports range strings for multiple points to test, uses the same format as the --mass argument')
+        group.add_argument(
+            '-s', '--seed', help='Supports range strings for multiple RNG seeds, uses the same format as the --mass argument')
+        group.add_argument(
+            '-d', '--datacard', nargs='*', default=[], help='Operate on multiple datacards')
+        group.add_argument('--name', '-n', default='.Test',
+                           help='Name used to label the combine output file, can be modified by other options')
+        group.add_argument(
+            '--setParameterRanges', help='Some other options will modify or add to the list of parameter ranges')
+
+
+    def attach_args(self, group):
+        CombineToolBase.attach_args(self, group)
+        group.add_argument(
+            '--opts', nargs='+', default=[], help='Add preset combine option groups')
+        group.add_argument(
+            '--there', action='store_true', help='Run combine in the same directory as the workspace')
+        group.add_argument('--split-points', type=int, default=0,
+                           help='When used in conjunction with --points will create multiple combine calls that each run at most the number of points specified here.')
+        group.add_argument(
+            '--boundlist', help='Name of json-file which contains the ranges of physical parameters depending on the given mass and given physics model')
+        group.add_argument(
+            '--generate', nargs='*', default=[], help='Generate sets of options')
+
+    def set_args(self, known, unknown):
+        CombineToolBase.set_args(self, known, unknown)
+        if hasattr(self.args, 'opts'):
+            for opt in self.args.opts:
+                self.passthru.append(OPTS[opt])
+
+    def run_method(self):
+        # Put the method back in because we always take it out
+        self.put_back_arg('method', '-M')
+
+        # cmd_queue = []
+        subbed_vars = {}
+
+        # pre_cmd = ''
+
+        if self.args.mass is not None:
+            mass_vals = utils.split_vals(self.args.mass)
+            subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals]
+            self.passthru.extend(['-m', '%(MASS)s'])
+
+        if self.args.singlePoint is not None:
+            single_points = utils.split_vals(self.args.singlePoint)
+            subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points]
+            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
+            self.args.name += '.POINT.%(SINGLEPOINT)s'
+
+        if self.args.seed is not None:
+            seed_vals = utils.split_vals(self.args.seed)
+            subbed_vars[('SEED',)] = [(sval,) for sval in seed_vals]
+            self.passthru.extend(['-s', '%(SEED)s'])
+
+        for i, generate in enumerate(self.args.generate):
+            split_char = ':' if '::' in generate else ';'
+            gen_header, gen_content = generate.split(split_char*2)
+            print(gen_header)
+            print(gen_content)
+            gen_headers = gen_header.split(split_char)
+            gen_entries = gen_content.split(split_char)
+            key = tuple()
+            arglist = []
+            for header in gen_headers:
+                if header == 'n' or header == 'name':
+                    self.args.name += '.%(GENNAME' + str(i) + ')s'
+                    key += ('GENNAME' + str(i),)
+                else:
+                    self.passthru.extend(['%(' + header + ')s'])
+                    key += (header,)
+            for entry in gen_entries:
+                if ',,' in entry:
+                    split_entry = entry.split(',,')
+                else:
+                    split_entry = entry.split(',')
+                final_arg = []
+                for header, e in zip(gen_headers, split_entry):
+                    argname = '-%s' % header if len(header) == 1 else '--%s' % header
+                    if header == 'n' or header == 'name':
+                        final_arg.append(e)
+                    elif len(e) and e != '!':
+                        final_arg.append('%s %s' % (argname, e))
+                    else:
+                        final_arg.append('')
+                arglist.append(tuple(final_arg))
+            subbed_vars[key] = arglist
+
+
+        if len(self.args.datacard) >= 1:
+            # Two lists of tuples, one which does specify the mass, and one
+            # which doesn't
+            dc_mass = []
+            dc_no_mass = []
+            for dc in self.args.datacard:
+                # Split workspace into path and filename
+                path, file = os.path.split(dc)
+                # If the wsp is in the current directory should call it '.'
+                if path == '':
+                    path = '.'
+                # If we're not using the --there option then leave the
+                # workspace argument as the full path
+                if not self.args.there:
+                    file = dc
+                # Figure out if the enclosing directory is a mass value
+                dirs = path.split('/')
+                if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]):
+                    print('Assuming card %s uses mass value %s' % (dc, dirs[-1]))
+                    dc_mass.append((path, file, dirs[-1]))
+                dc_no_mass.append((path, file))
+            # If at least one mass value was inferred assume all of them are like this
+            if len(dc_mass) > 0:
+                subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass
+                self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s'])
+            else:
+                subbed_vars[('DIR', 'DATACARD',)] = dc_no_mass
+                self.passthru.extend(['-d', '%(DATACARD)s'])
+        # elif len(self.args.datacard) == 1:
+        #     self.passthru.extend(['-d', self.args.datacard[0]])
+
+        current_ranges = self.args.setParameterRanges
+        put_back_ranges = current_ranges is not None
+
+        if self.args.boundlist is not None:
+            # We definitely don't need to put the parameter ranges back
+            # into the args because they're going in via the boundlist
+            # option instead
+            put_back_ranges = False
+            with open(self.args.boundlist) as json_file:
+                bnd = json.load(json_file)
+            bound_pars = list(bnd.keys())
+            print('Found bounds for parameters %s' % ','.join(bound_pars))
+            # Fill a dictionaries of the bound info of the form:
+            #  { 'PAR1' : [(MASS, LOWER, UPER), ...], ...}
+            bound_vals = {}
+            for par in bound_pars:
+                bound_vals[par] = list()
+                for mass, bounds in six.iteritems(bnd[par]):
+                    bound_vals[par].append((float(mass), bounds[0], bounds[1]))
+                bound_vals[par].sort(key=lambda x: x[0])
+            # find the subbed_vars entry containing the mass
+            # We will extend it to also specify the ranges
+            dict_key = None
+            mass_idx = None
+            for key in subbed_vars.keys():
+                if 'MASS' in key:
+                    dict_key = key
+                    mass_idx = dict_key.index('MASS')
+            new_key = dict_key + ('MODELBOUND',)
+            new_list = []
+            for entry in subbed_vars[dict_key]:
+                command = []
+                if current_ranges is not None:
+                    command.append(current_ranges)
+                mval = entry[mass_idx]
+                for par in bound_pars:
+                    # The (mass, None, None) is just a trick to make bisect_left do the comparison
+                    # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
+                    # issues
+                    lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None))
+                    # If lower_bound == 0 this means we are at or below the lowest mass point,
+                    # in which case we should increase by one to take the bounds from this lowest
+                    # point
+                    if lower_bound == 0:
+                        lower_bound += 1
+                    command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2]))
+                new_list.append(entry + (str(':'.join(command)),))
+            # now remove the current mass information from subbed_vars
+            # and replace it with the updated one
+            del subbed_vars[dict_key]
+            subbed_vars[new_key] = new_list
+            self.passthru.extend(['--setParameterRanges',  '%(MODELBOUND)s'])
+
+        # We might need to put the intercepted --setParameterRanges arg back in
+        if put_back_ranges:
+            self.put_back_arg('setParameterRanges', '--setParameterRanges')
+
+        if self.args.points is not None:
+            self.passthru.extend(['--points', self.args.points])
+        if (self.args.split_points is not None and
+                self.args.split_points > 0 and
+                self.args.points is not None):
+            points = int(self.args.points)
+            split = self.args.split_points
+            start = 0
+            ranges = []
+            while (start + (split - 1)) < points:
+            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
+            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
+            #        # Send job, if the file it's supposed to create doesn't exist yet
+            #        # or if the file is empty because the previous job didn't finish
+                ranges.append((start, start + (split - 1)))
+                start += split
+            if start < points:
+            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
+            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
+                ranges.append((start, points - 1))
+            #if (ranges == []):
+            #    print "No jobs were created; All files already exist"
+            #    exit()
+            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
+            self.passthru.extend(
+                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
+            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'
+
+        # can only put the name option back now because we might have modified
+        # it from what the user specified
+        self.put_back_arg('name', '-n')
+        proto = 'combine ' + (' '.join(self.passthru))
+        if self.args.there:
+            proto = 'pushd %(DIR)s; combine ' + (' '.join(self.passthru))+'; popd'
+
+        for it in itertools.product(*list(subbed_vars.values())):
+            keys = list(subbed_vars.keys())
+            dict = {}
+            for i, k in enumerate(keys):
+                for tuple_i, tuple_ele in enumerate(k):
+                    dict[tuple_ele] = it[i][tuple_i]
+            self.job_queue.append(proto % dict)
+        self.flush_queue()