Skip to content

Commit

Permalink
Merge pull request #76 from cmstas/fix_zeroize
Browse files Browse the repository at this point in the history
New implementation of low-mc-stats zeroization
  • Loading branch information
kmohrman authored Jan 7, 2025
2 parents 959b0d1 + 3b32d11 commit 3639772
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 60 deletions.
18 changes: 13 additions & 5 deletions analysis/wwz/make_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,23 @@ def handle_negatives(in_dict,zero_low_mc):
for proc in in_dict[cat]["nominal"]:
val = in_dict[cat]["nominal"][proc][0]
var = in_dict[cat]["nominal"][proc][1]
if val <= 0:
if zero_low_mc:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.")

# Kill contributions that are consistent with 0
if zero_low_mc:
if val <= np.sqrt(var):
if "data" in proc:
# This function is only used on MC, make sure no data (especially since we do not want to zeroize any data bins)
raise Exception("This function does not expect data. Something went wrong.")
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is smaller than MC stat (yield {val}, and mc stat {var**0.5}), replacing with {SMALL} and killing up/down systematic variations.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = 0
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
out_dict[cat][syst][proc][0] = SMALL
else:

# If not killing contributions that are consistent with 0, just replace 0 and negaive contributions with SMALL
else:
if val <= 0:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
Expand Down Expand Up @@ -604,7 +612,7 @@ def main():
# Get yield dictionary (nested in the order: year,cat,syst,proc)
yld_dict_mc_allyears = {}
for year in sample_names_dict_mc:
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year],zero_low_mc = zero_low_mc)
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year])
if do_nuis:
handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27)

Expand Down
141 changes: 95 additions & 46 deletions analysis/wwz/parse_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,64 +36,113 @@
SYST_GRP = {

"run2" : {
"pu" : ['PU'],
"prefire" : ['PreFiring'],
"scale" : ['renorm', 'fact'],
"ps": ['ISR', 'FSR',],
"btag" : [
'btagSFbc_correlated',
'btagSFlight_correlated',
'btagSFbc_uncorrelated_2018',
'btagSFlight_uncorrelated_2018',
'btagSFbc_uncorrelated_2016APV',
'btagSFlight_uncorrelated_2016APV',
'btagSFbc_uncorrelated_2017',
'btagSFlight_uncorrelated_2017',
'btagSFbc_uncorrelated_2016',
'btagSFlight_uncorrelated_2016',
"CMS_btag_fixedWP_comb_bc_correlated",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2016postVFP",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2016preVFP",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2017",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2018",
"CMS_btag_fixedWP_incl_light_correlated",
"CMS_btag_fixedWP_incl_light_uncorrelated_2016postVFP",
"CMS_btag_fixedWP_incl_light_uncorrelated_2016preVFP",
"CMS_btag_fixedWP_incl_light_uncorrelated_2017",
"CMS_btag_fixedWP_incl_light_uncorrelated_2018",
],
"ele" : ['lepSF_elec_run2'],
"mu" : ['lepSF_muon_run2'],
"ele" : ["CMS_eff_e_13TeV",],
"muo" : ["CMS_eff_m_13TeV",],
"prefire" : ["CMS_l1_ecal_prefiring",],
"pu" : ["CMS_pileup",],
"jerc" : [
'JEC_2018',
'JER_2018',
'JEC_2017',
'JER_2017',
'JEC_2016',
'JER_2016',
'JEC_2016APV',
'JER_2016APV',
"CMS_res_j_2016postVFP",
"CMS_res_j_2016preVFP",
"CMS_res_j_2017",
"CMS_res_j_2018",
"CMS_scale_j_2016postVFP",
"CMS_scale_j_2016preVFP",
"CMS_scale_j_2017",
"CMS_scale_j_2018",
],
#['lumi'],
#['theory_norm_other_other'],
#['fake_WZ_run2']
"met" : [
"CMS_scale_met_unclustered_energy_2016postVFP",
"CMS_scale_met_unclustered_energy_2016preVFP",
"CMS_scale_met_unclustered_energy_2017",
"CMS_scale_met_unclustered_energy_2018",
],
"renormfact" : [
"QCDscale_fac_WWZ",
"QCDscale_fac_WZ",
"QCDscale_fac_ZH",
"QCDscale_fac_ZZ",
"QCDscale_fac_other",
"QCDscale_fac_tWZ",
"QCDscale_fac_ttZ",
"QCDscale_ren_WWZ",
"QCDscale_ren_WZ",
"QCDscale_ren_ZH",
"QCDscale_ren_ZZ",
"QCDscale_ren_other",
"QCDscale_ren_tWZ",
"QCDscale_ren_ttZ",
],
"ps" : [
"ps_fsr_WWZ", "ps_fsr_ZH", "ps_fsr_ZZ", "ps_fsr_ttZ", "ps_fsr_tWZ", "ps_fsr_WZ", "ps_fsr_other",
"ps_isr_WWZ", "ps_isr_ZH", "ps_isr_ZZ", "ps_isr_ttZ", "ps_isr_tWZ", "ps_isr_WZ", "ps_isr_other",
],

},

"run3" : {
"pu" : ['PU'],
"scale" : ['renorm', 'fact'],
"ps": ['ISR', 'FSR',],

"btag" : [
'btagSFbc_correlated',
'btagSFlight_correlated',
'btagSFbc_uncorrelated_2022',
'btagSFbc_uncorrelated_2022EE',
'btagSFbc_uncorrelated_2023',
'btagSFbc_uncorrelated_2023BPix',
"CMS_btag_fixedWP_comb_bc_correlated",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2022",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2022EE",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2023",
"CMS_btag_fixedWP_comb_bc_uncorrelated_2023BPix",
"CMS_btag_fixedWP_incl_light_correlated",
],
"ele" : ['lepSF_elec_run3'],
"mu" : ['lepSF_muon_run3'],
"ele" : ["CMS_eff_e_13p6TeV",],
"muo" : ["CMS_eff_m_13p6TeV",],
"pu" : ["CMS_pileup",],
"jerc" : [
'JEC_2022',
'JER_2022',
'JEC_2022EE',
'JER_2022EE',
'JEC_2023',
'JER_2023',
'JEC_2023BPix',
'JER_2023BPix',
"CMS_res_j_2022",
"CMS_res_j_2022EE",
"CMS_res_j_2023",
"CMS_res_j_2023BPix",
"CMS_scale_j_2022",
"CMS_scale_j_2022EE",
"CMS_scale_j_2023",
"CMS_scale_j_2023BPix",
],
"met" : [
"CMS_scale_met_unclustered_energy_2022",
"CMS_scale_met_unclustered_energy_2022EE",
"CMS_scale_met_unclustered_energy_2023",
"CMS_scale_met_unclustered_energy_2023BPix",
],

"renormfact" : [
"QCDscale_fac_WWZ",
"QCDscale_fac_WZ",
"QCDscale_fac_ZH",
"QCDscale_fac_ZZ",
"QCDscale_fac_other",
"QCDscale_fac_tWZ",
"QCDscale_fac_ttZ",
"QCDscale_ren_WWZ",
"QCDscale_ren_WZ",
"QCDscale_ren_ZH",
"QCDscale_ren_ZZ",
"QCDscale_ren_other",
"QCDscale_ren_tWZ",
"QCDscale_ren_ttZ",
],
"ps" : [
"ps_fsr_WWZ", "ps_fsr_ZH", "ps_fsr_ZZ", "ps_fsr_ttZ", "ps_fsr_tWZ", "ps_fsr_WZ", "ps_fsr_other",
"ps_isr_WWZ", "ps_isr_ZH", "ps_isr_ZZ", "ps_isr_ttZ", "ps_isr_tWZ", "ps_isr_WZ", "ps_isr_other",
],
}

}


Expand Down
4 changes: 3 additions & 1 deletion analysis/wwz/wwz4l.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,10 +1236,12 @@ def process(self, events):
if isData: cuts_lst.append("is_good_lumi") # Apply golden json requirements if this is data
all_cuts_mask = selections.all(*cuts_lst)

# Print info about the events
#import sys
#run = events.run[all_cuts_mask]
#luminosityBlock = events.luminosityBlock[all_cuts_mask]
#event = events.event[all_cuts_mask]
#w = weights[all_cuts_mask]
#w = weight[all_cuts_mask]
#if dense_axis_name == "njets":
# print("\nSTARTPRINT")
# for i,j in enumerate(w):
Expand Down
9 changes: 1 addition & 8 deletions ewkcoffea/modules/yield_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def valvar_op(valvar_1, valvar_2, op):
############## Getting yields from a histo ##############

# Get the yields (nested in the order: year,cat,syst,proc)
def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=None):
def get_yields(histo,sample_dict,blind=True,systematic_name=None):

yld_dict = {}

Expand All @@ -57,13 +57,6 @@ def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=
val = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].values(flow=True)))
var = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].variances(flow=True)))

# Optionally zero out bins that are consistent with zero in the nominal
# Note we do not do this for data
if zero_low_mc and ("data" not in proc_name):
if (np.sqrt(var_n) >= val_n):
val = 0
var = 0

yld_dict[cat_name][syst_name][proc_name] = [val,var]

return yld_dict
Expand Down

0 comments on commit 3639772

Please sign in to comment.