Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unblinding Updates #74

Merged
merged 13 commits into from
Dec 19, 2024
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
run: |
mkdir dir_for_topcoffea
cd dir_for_topcoffea
git clone https://github.com/TopEFT/topcoffea.git
git clone https://github.com/TopEFT/topcoffea.git -b pin_dec06_2024
cd topcoffea
conda run -n coffea-env pip install -e .
cd ../..
Expand Down
35 changes: 23 additions & 12 deletions analysis/wwz/make_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,20 +252,28 @@ def handle_per_year_systs_for_fr(in_dict,year_name,do_jec27):
# - Replace the value with SMALL
# - And add |value| to the stat error to be more conservative
# - Shift the up/down variations to be centered around SMALL (does not touch stat uncertainty on up/down)
def handle_negatives(in_dict):
def handle_negatives(in_dict,zero_low_mc):
out_dict = copy.deepcopy(in_dict)
for cat in in_dict:
for proc in in_dict[cat]["nominal"]:
val = in_dict[cat]["nominal"][proc][0]
var = in_dict[cat]["nominal"][proc][1]
if val <= 0:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation
out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL
if zero_low_mc:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and setting variations to 1/1.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = 0
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
out_dict[cat][syst][proc][0] = SMALL
else:
print(f"WARNING: Process \"{proc}\" in cat \"{cat}\" is negative ({val}), replacing with {SMALL} and shifting up/down systematic variations accordingly.")
out_dict[cat]["nominal"][proc][0] = SMALL
out_dict[cat]["nominal"][proc][1] = (abs(val) + np.sqrt(var))**2
for syst in out_dict[cat]:
if syst == "nominal": continue # Already handled this one
syst_var_orig = out_dict[cat][syst][proc][0] # Dont bother messsing with mc stat error on the syst variation
out_dict[cat][syst][proc][0] = (syst_var_orig - val) + SMALL # Center around SMALL

return out_dict

Expand Down Expand Up @@ -515,7 +523,7 @@ def un_correlate_mur_muf(in_dict):
for syst_name,val in in_dict.items():

# For muR and muF, we need to de correlate across procs
if syst_name in ["QCDscale_ren","QCDscale_fac"]:
if syst_name in ["QCDscale_ren","QCDscale_fac", "ps_isr","ps_fsr"]:
# We'll need a muR and muF for each proc in the proc list
for proc_of_interest in sg.PROC_LST:
new_syst_name = f"{syst_name}_{proc_of_interest}"
Expand All @@ -534,6 +542,7 @@ def un_correlate_mur_muf(in_dict):
return out_dict



#####################################
########### Main function ###########

Expand All @@ -548,6 +557,7 @@ def main():
parser.add_argument("--bdt",action="store_true",help="Use BDT SR bins")
parser.add_argument("--jec-do-twentyseven",action="store_true",help="Use the 27 JEC uncertainty variations :(")
parser.add_argument("--unblind",action="store_true",help="If set, use real data, otherwise use asimov data")
parser.add_argument("--zero-low-mc",action="store_true",help="If set, mc processes that are consistent with zero will be set to zero")
parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["run2","run3","y22","y23"])

args = parser.parse_args()
Expand All @@ -558,6 +568,7 @@ def main():
do_jec27= args.jec_do_twentyseven
use_bdt_sr = args.bdt
unblind = args.unblind
zero_low_mc = args.zero_low_mc
run = args.run

# Check args
Expand Down Expand Up @@ -593,12 +604,12 @@ def main():
# Get yield dictionary (nested in the order: year,cat,syst,proc)
yld_dict_mc_allyears = {}
for year in sample_names_dict_mc:
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year])
yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year],zero_low_mc = zero_low_mc)
if do_nuis:
handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27)

yld_dict_mc = yld_dict_mc_allyears["FR"]
yld_dict_data = yt.get_yields(histo,sample_names_dict_data["FR"])
yld_dict_data = yt.get_yields(histo,sample_names_dict_data["FR"],blind = not unblind)

# Scale yield for any processes (e.g. for testing impacts of small backgrounds)
scale_dict = {"WZ":1.0}
Expand Down Expand Up @@ -628,7 +639,7 @@ def main():


# Get rid of negative yields (and recenter syst variations around SMALL), should happen before computing kappas
yld_dict_mc = handle_negatives(yld_dict_mc)
yld_dict_mc = handle_negatives(yld_dict_mc,zero_low_mc)

# Get the syst ratios to nominal (i.e. kappas)
kappa_dict = None
Expand Down
10 changes: 7 additions & 3 deletions analysis/wwz/make_datacards_wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ R3_PKL="r3_wwz_histos_withSyst.pkl.gz"

rm cards_wwz4l/*

# For BDT results
python make_datacards.py histos/$R2_PKL -u run2 -s --bdt
python make_datacards.py histos/$R3_PKL -u run3 -s --bdt
# For unblind BDT results
python make_datacards.py histos/$R2_PKL -u run2 -s --bdt --unblind --zero-low-mc
python make_datacards.py histos/$R3_PKL -u run3 -s --bdt --unblind --zero-low-mc

# For blind BDT results
#python make_datacards.py histos/$R2_PKL -u run2 -s --bdt --zero-low-mc
#python make_datacards.py histos/$R3_PKL -u run3 -s --bdt --zero-low-mc

# For Cut Based results
#python make_datacards.py histos/$R2_PKL -u run2 -s
Expand Down
8 changes: 4 additions & 4 deletions analysis/wwz/yld_dicts_for_comp.py

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion ewkcoffea/modules/yield_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def valvar_op(valvar_1, valvar_2, op):
############## Getting yields from a histo ##############

# Get the yields (nested in the order: year,cat,syst,proc)
def get_yields(histo,sample_dict,blind=True,systematic_name=None):
def get_yields(histo,sample_dict,blind=True,zero_low_mc = False,systematic_name=None):

yld_dict = {}

Expand All @@ -51,8 +51,19 @@ def get_yields(histo,sample_dict,blind=True,systematic_name=None):
# If this is data and we're not in a CR category, put placeholder numbers for now
yld_dict[cat_name][syst_name][proc_name] = [-999,-999]
else:
val_n = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":"nominal" }].values(flow=True)))
var_n = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":"nominal" }].variances(flow=True)))

val = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].values(flow=True)))
var = sum(sum(histo[{"category":cat_name,"process":sample_dict[proc_name],"systematic":syst_name }].variances(flow=True)))

# Optionally zero out bins that are consistent with zero in the nominal
# Note we do not do this for data
if zero_low_mc and ("data" not in proc_name):
if (np.sqrt(var_n) >= val_n):
val = 0
var = 0

yld_dict[cat_name][syst_name][proc_name] = [val,var]

return yld_dict
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0033,
"xsec": 0.00312,
"year": "2022EE",
"treeName": "Events",
"histAxisName": "2022EE_GluGluZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0195,
"xsec": 0.0217,
"year": "2022EE",
"treeName": "Events",
"histAxisName": "2022EE_qqToZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0033,
"xsec": 0.00312,
"year": "2022",
"treeName": "Events",
"histAxisName": "2022_GluGluZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0195,
"xsec": 0.0217,
"year": "2022",
"treeName": "Events",
"histAxisName": "2022_qqToZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0033,
"xsec": 0.00312,
"year": "2023BPix",
"treeName": "Events",
"histAxisName": "2023BPix_GluGluZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0195,
"xsec": 0.0217,
"year": "2023BPix",
"treeName": "Events",
"histAxisName": "2023BPix_qqToZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0033,
"xsec": 0.00312,
"year": "2023",
"treeName": "Events",
"histAxisName": "2023_GluGluZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0195,
"xsec": 0.0217,
"year": "2023",
"treeName": "Events",
"histAxisName": "2023_qqToZHTo2WTo2L2Nu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.00275,
"xsec": 0.00282,
"year": "2016APV",
"treeName": "Events",
"histAxisName": "UL16APV_GluGluZH",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0018639,
"xsec": 0.00205,
"year": "2016APV",
"treeName": "Events",
"histAxisName": "UL16APV_qqToZHToZTo2L",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.00275,
"xsec": 0.00282,
"year": "2016",
"treeName": "Events",
"histAxisName": "UL16_GluGluZH",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0018639,
"xsec": 0.00205,
"year": "2016",
"treeName": "Events",
"histAxisName": "UL16_qqToZHToZTo2L",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.00275,
"xsec": 0.00282,
"year": "2017",
"treeName": "Events",
"histAxisName": "UL17_GluGluZH",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0018639,
"xsec": 0.00205,
"year": "2017",
"treeName": "Events",
"histAxisName": "UL17_qqToZHToZTo2L",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.00275,
"xsec": 0.00282,
"year": "2018",
"treeName": "Events",
"histAxisName": "UL18_GluGluZH",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"xsec": 0.0018639,
"xsec": 0.00205,
"year": "2018",
"treeName": "Events",
"histAxisName": "UL18_qqToZHToZTo2L",
Expand Down
Loading