Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] New implementation of low-mc-stats zeroization #76

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions analysis/wwz/make_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,23 @@ def handle_negatives(in_dict,zero_low_mc):
for proc in in_dict[cat]["nominal"]:
val = in_dict[cat]["nominal"][proc][0]
var = in_dict[cat]["nominal"][proc][1]
if val <= 0:
if zero_low_mc:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.")

# Kill contributions that are consistent with 0
if zero_low_mc:
if val <= np.sqrt(var):
if "data" in proc:
# This function is only used on MC, make sure no data (especially since we do not want to zeroize any data bins)
raise Exception("This function does not expect data. Something went wrong.")
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is smaller than MC stat (yield {val}, and mc stat {var**0.5}), replacing with {SMALL} and killing up/down systematic variations.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = 0
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
out_dict[cat][syst][proc][0] = SMALL
else:

# If not killing contributions that are consistent with 0, just replace 0 and negaive contributions with SMALL
else:
if val <= 0:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
Expand Down Expand Up @@ -604,7 +612,7 @@ def main():
# Get yield dictionary (nested in the order: year,cat,syst,proc)
yld_dict_mc_allyears = {}
for year in sample_names_dict_mc:
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year],zero_low_mc = zero_low_mc)
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year])
if do_nuis:
handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27)

Expand Down
9 changes: 1 addition & 8 deletions ewkcoffea/modules/yield_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def valvar_op(valvar_1, valvar_2, op):
############## Getting yields from a histo ##############

# Get the yields (nested in the order: year,cat,syst,proc)
def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=None):
def get_yields(histo,sample_dict,blind=True,systematic_name=None):

yld_dict = {}

Expand All @@ -57,13 +57,6 @@ def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=
val = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].values(flow=True)))
var = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].variances(flow=True)))

# Optionally zero out bins that are consistent with zero in the nominal
# Note we do not do this for data
if zero_low_mc and ("data" not in proc_name):
if (np.sqrt(var_n) >= val_n):
val = 0
var = 0

yld_dict[cat_name][syst_name][proc_name] = [val,var]

return yld_dict
Expand Down
Loading