From 991a3026fdbbc00a062ccf91b506ab2ebb518a19 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Fri, 16 Dec 2022 15:51:09 -0500 Subject: [PATCH 1/5] Update the brittle workaround to handle the dropped convs rate syst --- analysis/topEFT/make_cr_and_sr_plots.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py index a1eb580ea..56a4ed1bf 100644 --- a/analysis/topEFT/make_cr_and_sr_plots.py +++ b/analysis/topEFT/make_cr_and_sr_plots.py @@ -142,7 +142,7 @@ # Some of our processes do not have rate systs split into qcd and pdf, so we just list them under qcd # This list keeps track of those, so we can handle them when extracting the numbers from the rate syst json -PROC_WITH_JUST_QCD_RATE_SYST = ["tttt","ttll","ttlnu","Triboson","tWZ"] +PROC_WITHOUT_PDF_RATE_SYST = ["tttt","ttll","ttlnu","Triboson","tWZ","convs"] yt = YieldTools() @@ -235,7 +235,7 @@ def get_correlation_tag(uncertainty_name,proc_name,sample_group_map): if uncertainty_name in ["qcd_scale","pdf_scale"]: if proc_name_in_json is not None: if proc_name_in_json == "convs": - # Special case for conversions since we estimate from LO sample, we have _only_ pdf key not qcd + # Special case for conversions since we estimate from LO sample, we do not have qcd uncty # Would be better to handle this in a more general way corr_tag = None else: @@ -261,7 +261,7 @@ def get_rate_systs(sample_name,sample_group_map): # Get the scale uncty from the json (if there is not an uncertainty for this sample, return 1 since the uncertainties are multiplicative) if scale_name_for_json is not None: - if scale_name_for_json in PROC_WITH_JUST_QCD_RATE_SYST: + if scale_name_for_json in PROC_WITHOUT_PDF_RATE_SYST: # Special cases for when we do not have a pdf uncty (this is a really brittle workaround) # NOTE Someday should fix this, it's a really hardcoded and brittle and bad workaround pdf_uncty = [1.0,1,0] From 972875d9b93c45d531ca82ffa12c4be846c604dc Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Fri, 16 Dec 2022 19:49:44 -0500 Subject: [PATCH 2/5] Some preliminary code for renormfac decorrelation, to be continued.. --- analysis/topEFT/make_cr_and_sr_plots.py | 58 ++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py index 56a4ed1bf..252721dc8 100644 --- a/analysis/topEFT/make_cr_and_sr_plots.py +++ b/analysis/topEFT/make_cr_and_sr_plots.py @@ -321,6 +321,8 @@ def get_rate_syst_arrs(base_histo,proc_group_map): # Wrapper for getting plus and minus shape arrs def get_shape_syst_arrs(base_histo): + print("\n\nHere in get_shape_syst_arrs") + # Get the list of systematic base names (i.e. without the up and down tags) # Assumes each syst has a "systnameUp" and a "systnameDown" category on the systematic axis syst_var_lst = [] @@ -331,14 +333,63 @@ def get_shape_syst_arrs(base_histo): if syst_name_base not in syst_var_lst: syst_var_lst.append(syst_name_base) + print("syst_var_lst",syst_var_lst) + # Sum each systematic's contribtuions for all samples together (e.g. the ISR for all samples is summed linearly) p_arr_rel_lst = [] m_arr_rel_lst = [] for syst_name in syst_var_lst: + + if syst_name != "renormfact":continue + relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst + print("\t",syst_name) + print("\t",relevant_samples_lst) n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()] # Sum of all samples for up variation d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation + + print("n_arr",n_arr) + print("sum u",u_arr_sum) + print("sum d",d_arr_sum) + + # Uncorrelate renorm and fact across the processes (though leave processes in groups like dibosons correlated to be consistent with SR) + if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done + u_arr_sum = np.zeros_like(n_arr) # Initialize to 0 + d_arr_sum = np.zeros_like(n_arr) # Initialize to 0 + + #for proc_grp in CR_GRP_MAP.keys(): + for proc_grp in ["test"]: + #proc_lst = CR_GRP_MAP[proc_grp] + #if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here + #if proc_lst == []: continue + + proc_lst = relevant_samples_lst + + # We'll keep all signal processes as uncorrelated, as in SR + if proc_grp == "Signal": + for proc_name in proc_lst: + if proc_name not in relevant_samples_lst: continue + u_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()] + d_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()] + u_arr_sum += u_arr*u_arr + d_arr_sum += d_arr*d_arr + # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR) + else: + u_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()] + d_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()] + u_arr_sum += u_arr*u_arr + d_arr_sum += d_arr*d_arr + + # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs + print("pTHIS!",u_arr_sum) + print("pTHIS!",d_arr_sum) + u_arr_sum = np.sqrt(u_arr_sum) + d_arr_sum = np.sqrt(d_arr_sum) + print("THIS!",u_arr_sum) + print("THIS!",d_arr_sum) + + u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield @@ -346,6 +397,8 @@ def get_shape_syst_arrs(base_histo): p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list + print("done") + exit() return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)] # Get the squared arr for the jet dependent syst (e.g. for diboson jet dependent syst) @@ -1011,10 +1064,10 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_ # Loop over hists and make plots skip_lst = [] # Skip these hists - #skip_wlst = ["njets"] # Skip all but these hists + skip_wlst = ["njets","lj0pt"] # Skip all but these hists for idx,var_name in enumerate(dict_of_hists.keys()): if (var_name in skip_lst): continue - #if (var_name not in skip_wlst): continue + if (var_name not in skip_wlst): continue if (var_name == "njets"): # We do not keep track of jets in the sparse axis for the njets hists cr_cat_dict = get_dict_with_stripped_bin_names(CR_CHAN_DICT,"njets") @@ -1037,6 +1090,7 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_ # Loop over the CR categories for hist_cat in cr_cat_dict.keys(): + if hist_cat != "cr_3l": continue if (hist_cat == "cr_2los_Z" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The 2los Z category does not require jets (so leading jet plots do not make sense) if (hist_cat == "cr_2lss_flip" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The flip category does not require jets (so leading jet plots do not make sense) print("\n\tCategory:",hist_cat) From dcf5dfe50b34d85650efcf7839225f89f77c05c7 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Sun, 18 Dec 2022 14:43:06 -0500 Subject: [PATCH 3/5] Relative error decorrelated across groups, except signal --- analysis/topEFT/make_cr_and_sr_plots.py | 109 +++++++++++++++--------- 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py index 252721dc8..5b740de4c 100644 --- a/analysis/topEFT/make_cr_and_sr_plots.py +++ b/analysis/topEFT/make_cr_and_sr_plots.py @@ -340,64 +340,97 @@ def get_shape_syst_arrs(base_histo): m_arr_rel_lst = [] for syst_name in syst_var_lst: - if syst_name != "renormfact":continue - relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst + n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation + + #if syst_name != "renormfact":continue print("\t",syst_name) print("\t",relevant_samples_lst) - n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation - u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()] # Sum of all samples for up variation - d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation - print("n_arr",n_arr) - print("sum u",u_arr_sum) - print("sum d",d_arr_sum) - - # Uncorrelate renorm and fact across the processes (though leave processes in groups like dibosons correlated to be consistent with SR) + # Special handling of renorm and fact + # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR) if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done - u_arr_sum = np.zeros_like(n_arr) # Initialize to 0 - d_arr_sum = np.zeros_like(n_arr) # Initialize to 0 + a_arr_sum = np.zeros_like(n_arr) # Initialize to 0 + + for k,v in CR_GRP_MAP.items(): + print("\n",k) + for s in v: + if s in relevant_samples_lst: print(s) - #for proc_grp in CR_GRP_MAP.keys(): - for proc_grp in ["test"]: - #proc_lst = CR_GRP_MAP[proc_grp] - #if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here - #if proc_lst == []: continue + # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated + #for proc_grp in ["test"]: + for proc_grp in CR_GRP_MAP.keys(): + proc_lst = CR_GRP_MAP[proc_grp] + if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here + if proc_lst == []: continue - proc_lst = relevant_samples_lst + #proc_lst = relevant_samples_lst # We'll keep all signal processes as uncorrelated, as in SR if proc_grp == "Signal": for proc_name in proc_lst: if proc_name not in relevant_samples_lst: continue - u_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()] - d_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()] - u_arr_sum += u_arr*u_arr - d_arr_sum += d_arr*d_arr + + n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()] + u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()] + d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()] + + u_arr_proc_rel = u_arr_proc - n_arr_proc + d_arr_proc_rel = d_arr_proc - n_arr_proc + a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0 + + #print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel*a_arr_proc_rel) + print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel) + a_arr_sum += a_arr_proc_rel*a_arr_proc_rel + # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR) else: - u_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()] - d_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()] - u_arr_sum += u_arr*u_arr - d_arr_sum += d_arr*d_arr + n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()] + u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()] + d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()] + u_arr_grp_rel = u_arr_grp - n_arr_grp + d_arr_grp_rel = d_arr_grp - n_arr_grp + a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0 + + a_arr_sum += a_arr_grp_rel*a_arr_grp_rel # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs - print("pTHIS!",u_arr_sum) - print("pTHIS!",d_arr_sum) - u_arr_sum = np.sqrt(u_arr_sum) - d_arr_sum = np.sqrt(d_arr_sum) - print("THIS!",u_arr_sum) - print("THIS!",d_arr_sum) - - - u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal - d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal - p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield - m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield + p_arr_rel = np.sqrt(a_arr_sum) + m_arr_rel = -np.sqrt(a_arr_sum) + + + # If the syst is not renorm or fact, just treat it normally (correlate across all processes) + else: + u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()] # Sum of all samples for up variation + d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation + + u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal + d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal + p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield + m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield + print("n_arr",n_arr) + print("u_arr_sum",u_arr_sum) + print("d_arr_sum",d_arr_sum) + print("m rel",m_arr_rel) + print("p rel",p_arr_rel) + + print("\nEND:") + print("p_arr_rel",p_arr_rel) + print("m_arr_rel",m_arr_rel) + + # Square and append this syst to the return lists p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list print("done") + + print("\np_arr_rel_lst",p_arr_rel_lst) + print("\nm_arr_rel_lst",m_arr_rel_lst) + + print("\n\nRETURNING") + print("\np",sum(p_arr_rel_lst)) + print("\nm",sum(m_arr_rel_lst)) + exit() return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)] From 9fd8cab719a5bab4dcdf874a03788c188f0e7232 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Sun, 18 Dec 2022 15:08:18 -0500 Subject: [PATCH 4/5] Clean up and move new renorm fact stuff to a function --- analysis/topEFT/make_cr_and_sr_plots.py | 130 +++++++++++------------- 1 file changed, 61 insertions(+), 69 deletions(-) diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py index 5b740de4c..51a5bb04a 100644 --- a/analysis/topEFT/make_cr_and_sr_plots.py +++ b/analysis/topEFT/make_cr_and_sr_plots.py @@ -321,8 +321,6 @@ def get_rate_syst_arrs(base_histo,proc_group_map): # Wrapper for getting plus and minus shape arrs def get_shape_syst_arrs(base_histo): - print("\n\nHere in get_shape_syst_arrs") - # Get the list of systematic base names (i.e. without the up and down tags) # Assumes each syst has a "systnameUp" and a "systnameDown" category on the systematic axis syst_var_lst = [] @@ -333,71 +331,18 @@ def get_shape_syst_arrs(base_histo): if syst_name_base not in syst_var_lst: syst_var_lst.append(syst_name_base) - print("syst_var_lst",syst_var_lst) - # Sum each systematic's contribtuions for all samples together (e.g. the ISR for all samples is summed linearly) p_arr_rel_lst = [] m_arr_rel_lst = [] for syst_name in syst_var_lst: relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst - n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation - - #if syst_name != "renormfact":continue - print("\t",syst_name) - print("\t",relevant_samples_lst) + n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation # Special handling of renorm and fact # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR) if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done - a_arr_sum = np.zeros_like(n_arr) # Initialize to 0 - - for k,v in CR_GRP_MAP.items(): - print("\n",k) - for s in v: - if s in relevant_samples_lst: print(s) - - # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated - #for proc_grp in ["test"]: - for proc_grp in CR_GRP_MAP.keys(): - proc_lst = CR_GRP_MAP[proc_grp] - if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here - if proc_lst == []: continue - - #proc_lst = relevant_samples_lst - - # We'll keep all signal processes as uncorrelated, as in SR - if proc_grp == "Signal": - for proc_name in proc_lst: - if proc_name not in relevant_samples_lst: continue - - n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()] - u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()] - d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()] - - u_arr_proc_rel = u_arr_proc - n_arr_proc - d_arr_proc_rel = d_arr_proc - n_arr_proc - a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0 - - #print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel*a_arr_proc_rel) - print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel) - a_arr_sum += a_arr_proc_rel*a_arr_proc_rel - - # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR) - else: - n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()] - u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()] - d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()] - u_arr_grp_rel = u_arr_grp - n_arr_grp - d_arr_grp_rel = d_arr_grp - n_arr_grp - a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0 - - a_arr_sum += a_arr_grp_rel*a_arr_grp_rel - - # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs - p_arr_rel = np.sqrt(a_arr_sum) - m_arr_rel = -np.sqrt(a_arr_sum) - + p_arr_rel,m_arr_rel = get_decorrelated_uncty(syst_name,CR_GRP_MAP,relevant_samples_lst,base_histo,n_arr) # If the syst is not renorm or fact, just treat it normally (correlate across all processes) else: @@ -408,22 +353,11 @@ def get_shape_syst_arrs(base_histo): d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield - print("n_arr",n_arr) - print("u_arr_sum",u_arr_sum) - print("d_arr_sum",d_arr_sum) - print("m rel",m_arr_rel) - print("p rel",p_arr_rel) - - print("\nEND:") - print("p_arr_rel",p_arr_rel) - print("m_arr_rel",m_arr_rel) # Square and append this syst to the return lists p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list - print("done") - print("\np_arr_rel_lst",p_arr_rel_lst) print("\nm_arr_rel_lst",m_arr_rel_lst) @@ -431,9 +365,67 @@ def get_shape_syst_arrs(base_histo): print("\np",sum(p_arr_rel_lst)) print("\nm",sum(m_arr_rel_lst)) - exit() return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)] + +# Special case for renorm and fact, as these are decorrelated across processes +# Sorry to anyone who tries to read this in the future, this function is very ad hoc and messy and hard to follow +# Just used in get_shape_syst_arrs() +# Here are a few notes: +# - This is complicated, so I just symmetrized the errors +# - The processes are generally correlated across groups (e.g. WZ and ZZ) since this is what's done in the datacard maker for the SR +# - So the grouping generally follows what's in the CR group map, excetp in the case of signal +# - Since in the SR all signal processes are uncorrelated for these systs, we also uncorrelate here +# - Note there are caviats to this: +# * In the SR, TTZToLL_M1to10 and TTToSemiLeptonic and TTTo2L2Nu are all grouped into ttll +# * Here in the CR TTZToLL_M1to10 is part of signal group, but TTToSemiLeptonic and TTTo2L2Nu are in their own ttbar group +# * So there are two differences with respect to how these processes are grouped in the SR: +# 1) Here TTToSemiLeptonic and TTTo2L2Nu are correlated with each other, but not with ttll +# 2) Here TTZToLL_M1to10 is grouped as part of signal (as in SR) but here _all_ signal processes are uncorrleated so here TTZToLL_M1to10 is uncorrelated with ttll while in SR they would be correlated +def get_decorrelated_uncty(syst_name,grp_map,relevant_samples_lst,base_histo,template_zeros_arr): + + # Initialize the array we will return (ok technically we return sqrt of this arr squared..) + a_arr_sum = np.zeros_like(template_zeros_arr) # Just using this template_zeros_arr for its size + + # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated + for proc_grp in grp_map.keys(): + proc_lst = grp_map[proc_grp] + if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renorm and fact not relevant here + if proc_lst == []: continue # Nothing here + + # We'll keep all signal processes as uncorrelated, similar to what's done in SR + if proc_grp == "Signal": + for proc_name in proc_lst: + if proc_name not in relevant_samples_lst: continue + + n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()] + u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()] + d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()] + + u_arr_proc_rel = u_arr_proc - n_arr_proc + d_arr_proc_rel = d_arr_proc - n_arr_proc + a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0 + + a_arr_sum += a_arr_proc_rel*a_arr_proc_rel + + # Otherwise corrleated across groups (e.g. ZZ and WZ, as datacard maker does in SR) + else: + n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()] + u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()] + d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()] + u_arr_grp_rel = u_arr_grp - n_arr_grp + d_arr_grp_rel = d_arr_grp - n_arr_grp + a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0 + + a_arr_sum += a_arr_grp_rel*a_arr_grp_rel + + # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs + p_arr_rel = np.sqrt(a_arr_sum) + m_arr_rel = -np.sqrt(a_arr_sum) + + return [p_arr_rel,m_arr_rel] + + # Get the squared arr for the jet dependent syst (e.g. for diboson jet dependent syst) def get_diboson_njets_syst_arr(njets_histo_vals_arr,bin0_njets): From f017a99be9bb23c53852267d6847a63be905cc39 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Sun, 18 Dec 2022 15:41:13 -0500 Subject: [PATCH 5/5] Clean up --- analysis/topEFT/make_cr_and_sr_plots.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py index 51a5bb04a..bca542ea1 100644 --- a/analysis/topEFT/make_cr_and_sr_plots.py +++ b/analysis/topEFT/make_cr_and_sr_plots.py @@ -335,13 +335,15 @@ def get_shape_syst_arrs(base_histo): p_arr_rel_lst = [] m_arr_rel_lst = [] for syst_name in syst_var_lst: + # Skip the variation of renorm and fact together, since we're treating as independent + if syst_name == "renormfact": continue relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation # Special handling of renorm and fact # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR) - if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done + if (syst_name == "renorm") or (syst_name == "fact"): p_arr_rel,m_arr_rel = get_decorrelated_uncty(syst_name,CR_GRP_MAP,relevant_samples_lst,base_histo,n_arr) # If the syst is not renorm or fact, just treat it normally (correlate across all processes) @@ -358,13 +360,6 @@ def get_shape_syst_arrs(base_histo): p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list - print("\np_arr_rel_lst",p_arr_rel_lst) - print("\nm_arr_rel_lst",m_arr_rel_lst) - - print("\n\nRETURNING") - print("\np",sum(p_arr_rel_lst)) - print("\nm",sum(m_arr_rel_lst)) - return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)] @@ -374,9 +369,9 @@ def get_shape_syst_arrs(base_histo): # Here are a few notes: # - This is complicated, so I just symmetrized the errors # - The processes are generally correlated across groups (e.g. WZ and ZZ) since this is what's done in the datacard maker for the SR -# - So the grouping generally follows what's in the CR group map, excetp in the case of signal +# - So the grouping generally follows what's in the CR group map, except in the case of signal # - Since in the SR all signal processes are uncorrelated for these systs, we also uncorrelate here -# - Note there are caviats to this: +# - Note there are caveats to this: # * In the SR, TTZToLL_M1to10 and TTToSemiLeptonic and TTTo2L2Nu are all grouped into ttll # * Here in the CR TTZToLL_M1to10 is part of signal group, but TTToSemiLeptonic and TTTo2L2Nu are in their own ttbar group # * So there are two differences with respect to how these processes are grouped in the SR: @@ -1089,10 +1084,10 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_ # Loop over hists and make plots skip_lst = [] # Skip these hists - skip_wlst = ["njets","lj0pt"] # Skip all but these hists + #skip_wlst = ["njets"] # Skip all but these hists for idx,var_name in enumerate(dict_of_hists.keys()): if (var_name in skip_lst): continue - if (var_name not in skip_wlst): continue + #if (var_name not in skip_wlst): continue if (var_name == "njets"): # We do not keep track of jets in the sparse axis for the njets hists cr_cat_dict = get_dict_with_stripped_bin_names(CR_CHAN_DICT,"njets") @@ -1115,7 +1110,6 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_ # Loop over the CR categories for hist_cat in cr_cat_dict.keys(): - if hist_cat != "cr_3l": continue if (hist_cat == "cr_2los_Z" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The 2los Z category does not require jets (so leading jet plots do not make sense) if (hist_cat == "cr_2lss_flip" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The flip category does not require jets (so leading jet plots do not make sense) print("\n\tCategory:",hist_cat)