From 94962d796f6e65313f109b4cd0a359be5a0397ea Mon Sep 17 00:00:00 2001 From: Andrew42 Date: Mon, 19 Dec 2022 11:05:33 -0600 Subject: [PATCH] rework the way decorrelating shape systematics is done and include ISR decorrelations --- topcoffea/modules/datacard_tools.py | 77 ++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/topcoffea/modules/datacard_tools.py b/topcoffea/modules/datacard_tools.py index 5d3073ff3..6b6fd2503 100644 --- a/topcoffea/modules/datacard_tools.py +++ b/topcoffea/modules/datacard_tools.py @@ -13,6 +13,7 @@ from coffea.hist import StringBin, Cat, Bin from topcoffea.modules.paths import topcoffea_path +from topcoffea.modules.utils import regex_match import topcoffea.modules.eft_helper as efth PRECISION = 6 # Decimal point precision in the text datacard output @@ -382,6 +383,44 @@ def __init__(self,pkl_path,**kwargs): # "FFcloseMu": {"2016": ["2016APV","2017","2018"], "2016APV": ["2016","2017","2018"], "2017": ["2016","2016APV","2018"], "2018": ["2016","2016APV","2017"]}, } + # Defines which systematics should be decorrelated in the self.analysis() step. Each key + # should match (exactly) a particular systematic. The list for each systematic specifies + # which processes should remain remain correlated or not. + # Note: A given process should appear AT MOST once in the "matches" list for a given systematic + # grouping. If a process has an associated systematic, but doesn't match any of the + # groups, then it will retain its original systematic name (i.e. all unmatched + # processes will remain correlated). + # Note: For the special case where the group name is an empty string the systematic will + # instead have the matched process' name appended to it, meaning that all matched + # processes will be decorrelated! + # Note: Since the decorrelation happens during the self.analysis() step, the matched names + # should correspond to the renamed/re-grouped processes, e.g. use "Diboson" instead of + # "ZZ","WZ","WW". + self.syst_shape_decorrelate = { + "ISR": [ + { + "matches": ["ttH","ttll","tttt","convs"], + "group": "gg", + }, + { + "matches": ["ttlnu","tllq","Diboson","Triboson"], + "group": "qq", + }, + { + "matches": ["tHq"], + "group": "qg" + } + ], + "renorm": [{ + "matches": [".*"], + "group": "", + }], + "fact": [{ + "matches": [".*"], + "group": "", + }] + } + if extra_ignore: print(f"Adding processes to ignore: {extra_ignore}") self.ignore.extend(extra_ignore) @@ -852,20 +891,36 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins): hist_name = f"{proc_name}_{syst}" # Systematics in the text datacard don't have the Up/Down postfix syst_base = syst.replace("Up","").replace("Down","") - if syst_base in ["renorm","fact"]: # Note: Requires exact matches - # We want to split the renorm and fact systematics to be uncorrelated - # between processes, so we modify the systematic name to make combine - # treat them as separate systematics. Also, we use 'p' instead of - # 'proc_name' for renaming since we want the decomposed EFT terms - # for a particular process to share the same nuisance parameter - # TODO: We should move the hardcoded list in the if statement somewhere - # else to make it less buried in the weeds - split_syst = f"{syst_base}_{p}" + if syst_base in self.syst_shape_decorrelate: + # We want to split this systematic to be uncorrelated between certain + # processes, so we modify the systematic name to make combine treat + # them as separate systematics. Also, we use 'p' instead of 'proc_name' + # for renaming since we want the decomposed EFT terms for a particular + # process to share the same nuisance parameter + matched = [] + for r in self.syst_shape_decorrelate[syst_base]: + if regex_match([p],r["matches"]): + # The matched process should have this systematic put into a new group + matched.append(r["group"]) + if len(matched) == 0: + # No matches found, so keep the original systematic name + split_syst = syst_base + elif len(matched) == 1: + # Found a match, so decorrelate the process from non-matched processes + group = matched[0] + split_syst = f"{syst_base}_{group}" + if group == "": + # In the special case that the group is an empty string, + # decorrelate ALL matched processes + split_syst = f"{syst_base}_{p}" + else: + # We shouldn't have more than one match for a given systematic + raise RuntimeError(f"Unable to decorrelate shape systematic {syst_base} for {p}. Multiple group matches found: {matched}") hist_name = hist_name.replace(syst_base,split_syst) all_shapes.add(split_syst) text_card_info[proc_name]["shapes"].add(split_syst) - if self.verbose: - print(f"\t {hist_name}: Splitting {syst_base} --> {split_syst}") + if base == "sm" and self.verbose: + print(f"\tDecorrelate {p} for {syst_base} into {split_syst} ({syst.replace(syst_base,'')})") else: all_shapes.add(syst_base) text_card_info[proc_name]["shapes"].add(syst_base)