Skip to content

Commit

Permalink
Merge branch 'main' into tc_merge_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewDittrich committed Dec 20, 2024
2 parents ce98d8d + 959b0d1 commit ddd7b0c
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 25 deletions.
37 changes: 20 additions & 17 deletions analysis/wwz/make_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,27 +252,28 @@ def handle_per_year_systs_for_fr(in_dict,year_name,do_jec27):
# - Replace the value with SMALL
# - And add |value| to the stat error to be more conservative
# - Shift the up/down variations to be centered around SMALL (does not touch stat uncertainty on up/down)
def handle_negatives(in_dict):
def handle_negatives(in_dict,zero_low_mc):
out_dict = copy.deepcopy(in_dict)
for cat in in_dict:
for proc in in_dict[cat]["nominal"]:
val = in_dict[cat]["nominal"][proc][0]
var = in_dict[cat]["nominal"][proc][1]
if val <= 0:
#!!!OLD METHOD FOR HANDLING LOW YIELD BINS!!!
#print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
#out_dict[cat]["nominal"][proc][0] = SMALL
#out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
#for syst in out_dict[cat]:
# if syst == "nominal": continue # Already handled this one
# syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation
#out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = 0
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
out_dict[cat][syst][proc][0] = SMALL
if zero_low_mc:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = 0
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
out_dict[cat][syst][proc][0] = SMALL
else:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation
out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL

return out_dict

Expand Down Expand Up @@ -556,6 +557,7 @@ def main():
parser.add_argument("--bdt",action="store_true",help="Use BDT SR bins")
parser.add_argument("--jec-do-twentyseven",action="store_true",help="Use the 27 JEC uncertainty variations :(")
parser.add_argument("--unblind",action="store_true",help="If set, use real data, otherwise use asimov data")
parser.add_argument("--zero-low-mc",action="store_true",help="If set, mc processes that are consistent with zero will be set to zero")
parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["run2","run3","y22","y23"])

args = parser.parse_args()
Expand All @@ -566,6 +568,7 @@ def main():
do_jec27= args.jec_do_twentyseven
use_bdt_sr = args.bdt
unblind = args.unblind
zero_low_mc = args.zero_low_mc
run = args.run

# Check args
Expand Down Expand Up @@ -601,7 +604,7 @@ def main():
# Get yield dictionary (nested in the order: year,cat,syst,proc)
yld_dict_mc_allyears = {}
for year in sample_names_dict_mc:
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year])
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year],zero_low_mc = zero_low_mc)
if do_nuis:
handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27)

Expand Down Expand Up @@ -636,7 +639,7 @@ def main():


# Get rid of negative yields (and recenter syst variations around SMALL), should happen before computing kappas
yld_dict_mc = handle_negatives(yld_dict_mc)
yld_dict_mc = handle_negatives(yld_dict_mc,zero_low_mc)

# Get the syst ratios to nominal (i.e. kappas)
kappa_dict = None
Expand Down
10 changes: 7 additions & 3 deletions analysis/wwz/make_datacards_wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ R3_PKL="r3_wwz_histos_withSyst.pkl.gz"

rm cards_wwz4l/*

# For BDT results
python make_datacards.py histos/$R2_PKL -u run2 -s --bdt
python make_datacards.py histos/$R3_PKL -u run3 -s --bdt
# For unblind BDT results
python make_datacards.py histos/$R2_PKL -u run2 -s --bdt --unblind --zero-low-mc
python make_datacards.py histos/$R3_PKL -u run3 -s --bdt --unblind --zero-low-mc

# For blind BDT results
#python make_datacards.py histos/$R2_PKL -u run2 -s --bdt --zero-low-mc
#python make_datacards.py histos/$R3_PKL -u run3 -s --bdt --zero-low-mc

# For Cut Based results
#python make_datacards.py histos/$R2_PKL -u run2 -s
Expand Down
12 changes: 7 additions & 5 deletions ewkcoffea/modules/yield_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def valvar_op(valvar_1, valvar_2, op):
############## Getting yields from a histo ##############

# Get the yields (nested in the order: year,cat,syst,proc)
def get_yields(histo,sample_dict,blind=True,systematic_name=None):
def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=None):

yld_dict = {}

Expand All @@ -57,10 +57,12 @@ def get_yields(histo,sample_dict,blind=True,systematic_name=None):
val = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].values(flow=True)))
var = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].variances(flow=True)))

# Zero out bins that are consistent with zero in the nominal
if (np.sqrt(var_n) >= val_n):
val = 0
var = 0
# Optionally zero out bins that are consistent with zero in the nominal
# Note we do not do this for data
if zero_low_mc and ("data" not in proc_name):
if (np.sqrt(var_n) >= val_n):
val = 0
var = 0

yld_dict[cat_name][syst_name][proc_name] = [val,var]

Expand Down

0 comments on commit ddd7b0c

Please sign in to comment.