From 8ff77fb162fc246cd2cb2656c1048c02bc78e3c4 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Mon, 23 Dec 2024 12:17:37 -0800 Subject: [PATCH 1/4] Restore code to pre-zeroize-low-mc --- analysis/wwz/make_datacards.py | 24 ++++++++---------------- ewkcoffea/modules/yield_tools.py | 9 +-------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/analysis/wwz/make_datacards.py b/analysis/wwz/make_datacards.py index 8c61c83..6e6c12e 100644 --- a/analysis/wwz/make_datacards.py +++ b/analysis/wwz/make_datacards.py @@ -259,21 +259,13 @@ def handle_negatives(in_dict,zero_low_mc): val = in_dict[cat]["nominal"][proc][0] var = in_dict[cat]["nominal"][proc][1] if val <= 0: - if zero_low_mc: - print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.") - out_dict[cat]["nominal"][proc][0] = SMALL - out_dict[cat]["nominal"][proc][1] = 0 - for syst in out_dict[cat]: - if syst == "nominal": continue # Already handled this one - out_dict[cat][syst][proc][0] = SMALL - else: - print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.") - out_dict[cat]["nominal"][proc][0] = SMALL - out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2 - for syst in out_dict[cat]: - if syst == "nominal": continue # Already handled this one - syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation - out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL + print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.") + out_dict[cat]["nominal"][proc][0] = SMALL + out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2 + for syst in out_dict[cat]: + if syst == "nominal": continue # Already handled this one + syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation + out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL return out_dict @@ -604,7 +596,7 @@ def main(): # Get yield dictionary (nested in the order: year,cat,syst,proc) yld_dict_mc_allyears = {} for year in sample_names_dict_mc: - yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year],zero_low_mc = zero_low_mc) + yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year]) if do_nuis: handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27) diff --git a/ewkcoffea/modules/yield_tools.py b/ewkcoffea/modules/yield_tools.py index eba9520..1dc2f4d 100644 --- a/ewkcoffea/modules/yield_tools.py +++ b/ewkcoffea/modules/yield_tools.py @@ -34,7 +34,7 @@ def valvar_op(valvar_1, valvar_2, op): ############## Getting yields from a histo ############## # Get the yields (nested in the order: year,cat,syst,proc) -def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=None): +def get_yields(histo,sample_dict,blind=True,systematic_name=None): yld_dict = {} @@ -57,13 +57,6 @@ def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name= val = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].values(flow=True))) var = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].variances(flow=True))) - # Optionally zero out bins that are consistent with zero in the nominal - # Note we do not do this for data - if zero_low_mc and ("data" not in proc_name): - if (np.sqrt(var_n) >= val_n): - val = 0 - var = 0 - yld_dict[cat_name][syst_name][proc_name] = [val,var] return yld_dict From c57eeca39b268c01c9070ce5641c59362ef14f70 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Mon, 30 Dec 2024 12:53:47 -0800 Subject: [PATCH 2/4] Update datacard maker with method to zeroize low mc stats ylds --- analysis/wwz/make_datacards.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/analysis/wwz/make_datacards.py b/analysis/wwz/make_datacards.py index 6e6c12e..463e903 100644 --- a/analysis/wwz/make_datacards.py +++ b/analysis/wwz/make_datacards.py @@ -258,14 +258,30 @@ def handle_negatives(in_dict,zero_low_mc): for proc in in_dict[cat]["nominal"]: val = in_dict[cat]["nominal"][proc][0] var = in_dict[cat]["nominal"][proc][1] - if val <= 0: - print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.") - out_dict[cat]["nominal"][proc][0] = SMALL - out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2 - for syst in out_dict[cat]: - if syst == "nominal": continue # Already handled this one - syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation - out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL + + # Kill contributions that are consistent with 0 + if zero_low_mc: + if val <= np.sqrt(var): + if "data" in proc: + # This function is only used on MC, make sure no data (especially since we do not want to zeroize any data bins) + raise Exception("This function does not expect data. Something went wrong.") + print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is smaller than MC stat (yield {val}, and mc stat {var**0.5}), replacing with {SMALL} and killing up/down systematic variations.") + out_dict[cat]["nominal"][proc][0] = SMALL + out_dict[cat]["nominal"][proc][1] = 0 + for syst in out_dict[cat]: + if syst == "nominal": continue # Already handled this one + out_dict[cat][syst][proc][0] = SMALL + + # If not killing contributions that are consistent with 0, just replace 0 and negaive contributions with SMALL + else: + if val <= 0: + print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.") + out_dict[cat]["nominal"][proc][0] = SMALL + out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2 + for syst in out_dict[cat]: + if syst == "nominal": continue # Already handled this one + syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation + out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL return out_dict From aaea6341e4dc8c65afdaddcfa061db8c0cf603a6 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Tue, 7 Jan 2025 08:18:33 -0800 Subject: [PATCH 3/4] Update syst names in script for getting avg syst size from cards --- analysis/wwz/parse_datacards.py | 141 +++++++++++++++++++++----------- 1 file changed, 95 insertions(+), 46 deletions(-) diff --git a/analysis/wwz/parse_datacards.py b/analysis/wwz/parse_datacards.py index d9fa88c..8da1bc2 100644 --- a/analysis/wwz/parse_datacards.py +++ b/analysis/wwz/parse_datacards.py @@ -36,64 +36,113 @@ SYST_GRP = { "run2" : { - "pu" : ['PU'], - "prefire" : ['PreFiring'], - "scale" : ['renorm', 'fact'], - "ps": ['ISR', 'FSR',], "btag" : [ - 'btagSFbc_correlated', - 'btagSFlight_correlated', - 'btagSFbc_uncorrelated_2018', - 'btagSFlight_uncorrelated_2018', - 'btagSFbc_uncorrelated_2016APV', - 'btagSFlight_uncorrelated_2016APV', - 'btagSFbc_uncorrelated_2017', - 'btagSFlight_uncorrelated_2017', - 'btagSFbc_uncorrelated_2016', - 'btagSFlight_uncorrelated_2016', + "CMS_btag_fixedWP_comb_bc_correlated", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016postVFP", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016preVFP", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2017", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2018", + "CMS_btag_fixedWP_incl_light_correlated", + "CMS_btag_fixedWP_incl_light_uncorrelated_2016postVFP", + "CMS_btag_fixedWP_incl_light_uncorrelated_2016preVFP", + "CMS_btag_fixedWP_incl_light_uncorrelated_2017", + "CMS_btag_fixedWP_incl_light_uncorrelated_2018", ], - "ele" : ['lepSF_elec_run2'], - "mu" : ['lepSF_muon_run2'], + "ele" : ["CMS_eff_e_13TeV",], + "muo" : ["CMS_eff_m_13TeV",], + "prefire" : ["CMS_l1_ecal_prefiring",], + "pu" : ["CMS_pileup",], "jerc" : [ - 'JEC_2018', - 'JER_2018', - 'JEC_2017', - 'JER_2017', - 'JEC_2016', - 'JER_2016', - 'JEC_2016APV', - 'JER_2016APV', + "CMS_res_j_2016postVFP", + "CMS_res_j_2016preVFP", + "CMS_res_j_2017", + "CMS_res_j_2018", + "CMS_scale_j_2016postVFP", + "CMS_scale_j_2016preVFP", + "CMS_scale_j_2017", + "CMS_scale_j_2018", ], - #['lumi'], - #['theory_norm_other_other'], - #['fake_WZ_run2'] + "met" : [ + "CMS_scale_met_unclustered_energy_2016postVFP", + "CMS_scale_met_unclustered_energy_2016preVFP", + "CMS_scale_met_unclustered_energy_2017", + "CMS_scale_met_unclustered_energy_2018", + ], + "renormfact" : [ + "QCDscale_fac_WWZ", + "QCDscale_fac_WZ", + "QCDscale_fac_ZH", + "QCDscale_fac_ZZ", + "QCDscale_fac_other", + "QCDscale_fac_tWZ", + "QCDscale_fac_ttZ", + "QCDscale_ren_WWZ", + "QCDscale_ren_WZ", + "QCDscale_ren_ZH", + "QCDscale_ren_ZZ", + "QCDscale_ren_other", + "QCDscale_ren_tWZ", + "QCDscale_ren_ttZ", + ], + "ps" : [ + "ps_fsr_WWZ", "ps_fsr_ZH", "ps_fsr_ZZ", "ps_fsr_ttZ", "ps_fsr_tWZ", "ps_fsr_WZ", "ps_fsr_other", + "ps_isr_WWZ", "ps_isr_ZH", "ps_isr_ZZ", "ps_isr_ttZ", "ps_isr_tWZ", "ps_isr_WZ", "ps_isr_other", + ], + }, "run3" : { - "pu" : ['PU'], - "scale" : ['renorm', 'fact'], - "ps": ['ISR', 'FSR',], + "btag" : [ - 'btagSFbc_correlated', - 'btagSFlight_correlated', - 'btagSFbc_uncorrelated_2022', - 'btagSFbc_uncorrelated_2022EE', - 'btagSFbc_uncorrelated_2023', - 'btagSFbc_uncorrelated_2023BPix', + "CMS_btag_fixedWP_comb_bc_correlated", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2022", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2022EE", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2023", + "CMS_btag_fixedWP_comb_bc_uncorrelated_2023BPix", + "CMS_btag_fixedWP_incl_light_correlated", ], - "ele" : ['lepSF_elec_run3'], - "mu" : ['lepSF_muon_run3'], + "ele" : ["CMS_eff_e_13p6TeV",], + "muo" : ["CMS_eff_m_13p6TeV",], + "pu" : ["CMS_pileup",], "jerc" : [ - 'JEC_2022', - 'JER_2022', - 'JEC_2022EE', - 'JER_2022EE', - 'JEC_2023', - 'JER_2023', - 'JEC_2023BPix', - 'JER_2023BPix', + "CMS_res_j_2022", + "CMS_res_j_2022EE", + "CMS_res_j_2023", + "CMS_res_j_2023BPix", + "CMS_scale_j_2022", + "CMS_scale_j_2022EE", + "CMS_scale_j_2023", + "CMS_scale_j_2023BPix", + ], + "met" : [ + "CMS_scale_met_unclustered_energy_2022", + "CMS_scale_met_unclustered_energy_2022EE", + "CMS_scale_met_unclustered_energy_2023", + "CMS_scale_met_unclustered_energy_2023BPix", + ], + + "renormfact" : [ + "QCDscale_fac_WWZ", + "QCDscale_fac_WZ", + "QCDscale_fac_ZH", + "QCDscale_fac_ZZ", + "QCDscale_fac_other", + "QCDscale_fac_tWZ", + "QCDscale_fac_ttZ", + "QCDscale_ren_WWZ", + "QCDscale_ren_WZ", + "QCDscale_ren_ZH", + "QCDscale_ren_ZZ", + "QCDscale_ren_other", + "QCDscale_ren_tWZ", + "QCDscale_ren_ttZ", + ], + "ps" : [ + "ps_fsr_WWZ", "ps_fsr_ZH", "ps_fsr_ZZ", "ps_fsr_ttZ", "ps_fsr_tWZ", "ps_fsr_WZ", "ps_fsr_other", + "ps_isr_WWZ", "ps_isr_ZH", "ps_isr_ZZ", "ps_isr_ttZ", "ps_isr_tWZ", "ps_isr_WZ", "ps_isr_other", ], } + } From 3b32d11665c97d934fef3a8ebab62292c7cd9911 Mon Sep 17 00:00:00 2001 From: Kelci Mohrman Date: Tue, 7 Jan 2025 08:20:14 -0800 Subject: [PATCH 4/4] Tweaks to commented processor print statements --- analysis/wwz/wwz4l.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/analysis/wwz/wwz4l.py b/analysis/wwz/wwz4l.py index 6e9484c..b97ebb6 100644 --- a/analysis/wwz/wwz4l.py +++ b/analysis/wwz/wwz4l.py @@ -1236,10 +1236,12 @@ def process(self, events): if isData: cuts_lst.append("is_good_lumi") # Apply golden json requirements if this is data all_cuts_mask = selections.all(*cuts_lst) + # Print info about the events + #import sys #run = events.run[all_cuts_mask] #luminosityBlock = events.luminosityBlock[all_cuts_mask] #event = events.event[all_cuts_mask] - #w = weights[all_cuts_mask] + #w = weight[all_cuts_mask] #if dense_axis_name == "njets": # print("\nSTARTPRINT") # for i,j in enumerate(w):