From 8aa25da6b14ce8d211e1cba4ca5daa33b783fb42 Mon Sep 17 00:00:00 2001 From: Matthew Dittrich Date: Wed, 13 Nov 2024 09:27:01 -0800 Subject: [PATCH] Very Rough draft of making 2D eta-phi plots --- analysis/wwz/etaphi_plotter.py | 141 ++++++++++ analysis/wwz/for_jec_11_var.py | 93 +++++++ analysis/wwz/for_jec_27_var.py | 17 +- analysis/wwz/get_wwz_yields.py | 46 ++-- analysis/wwz/make_datacards.py | 55 +++- analysis/wwz/wwz4l.py | 431 ++++++++++++++++++------------- ewkcoffea/modules/corrections.py | 37 ++- ewkcoffea/params/rate_systs.json | 8 + 8 files changed, 612 insertions(+), 216 deletions(-) create mode 100644 analysis/wwz/etaphi_plotter.py create mode 100644 analysis/wwz/for_jec_11_var.py diff --git a/analysis/wwz/etaphi_plotter.py b/analysis/wwz/etaphi_plotter.py new file mode 100644 index 00000000..d8056363 --- /dev/null +++ b/analysis/wwz/etaphi_plotter.py @@ -0,0 +1,141 @@ +import os +import pickle +import gzip +import argparse +import matplotlib.pyplot as plt +import numpy as np +import awkward as ak + +from coffea import lookup_tools + +# This script is an example of how to dump some info from the btag eff histos (does not actually currently make a plot) +# Example usage: +# python btageff_plotter.py ../../ewkcoffea/data/btag_eff/btag_eff_ttZ_srpresel.pkl.gz -u run2 + +import numpy as np +import matplotlib.pyplot as plt + +def make_2d_plot(hist, title, nbins=10): + # Get the original x and y range + x_min, x_max = ak.flatten(hist.axes.edges[0])[0], ak.flatten(hist.axes.edges[0])[-1] + y_min, y_max = ak.flatten(hist.axes.edges[1])[0], ak.flatten(hist.axes.edges[1])[-1] + + # Define new bin edges using the same range but with fewer bins + xedges = np.linspace(x_min, x_max, nbins + 1) + yedges = np.linspace(y_min, y_max, nbins + 1) + + # Aggregate the original bin values to match the new binning + bin_values = hist.values() + new_bin_values = np.zeros((nbins, nbins)) + x_bin_width = bin_values.shape[0] // nbins + y_bin_width = bin_values.shape[1] // nbins + + for i in range(nbins): + for j in range(nbins): + new_bin_values[i, j] = bin_values[i*x_bin_width:(i+1)*x_bin_width, j*y_bin_width:(j+1)*y_bin_width].sum() + + # Plot with the new binning + fig, ax = plt.subplots() + X, Y = np.meshgrid(xedges, yedges) + mesh = ax.pcolormesh(X, Y, new_bin_values.T, cmap='viridis', shading='auto') + + # Add color bar with customized range + cbar = plt.colorbar(mesh, ax=ax, extend='both') + cbar.set_label('Jet Counts') + + # Set axis labels and title + ax.set_xlabel('ETA') + ax.set_ylabel('PHI') + ax.set_title(title) + + # Annotate each bin with its value + #for i in range(nbins): + # for j in range(nbins): + # bin_value = f'{new_bin_values[i, j]:.2f}' + # x = (xedges[j] + xedges[j+1]) / 2 + # y = (yedges[i] + yedges[i+1]) / 2 + # ax.text(x, y, bin_value, ha='center', va='center', color='white', fontweight='bold', fontsize=8, + # bbox=dict(facecolor='black', alpha=0.5, edgecolor='none')) + + return fig + + + +#def make_2d_plot(hist, title): +# xedges = ak.flatten(hist.axes.edges[0]) +# yedges = ak.flatten(hist.axes.edges[1]) +# bin_values = hist.values() +# +# # Print bin values for debugging +# #print("Bin values:\n", bin_values) +# +# fig, ax = plt.subplots() +# X, Y = np.meshgrid(xedges, yedges) +# mesh = ax.pcolormesh(X, Y, bin_values.T, cmap='viridis', shading='auto') +# +# # Add color bar with customized range +# cbar = plt.colorbar(mesh, ax=ax, extend='both') # extend='both' for color bar extensions +# cbar.set_label('Jet Counts') +# #cbar.set_ticks(np.linspace(vmin, vmax, num=5)) # Example of setting ticks +# +# # Set color bar limits directly when creating it +# mesh.set_clim(vmin=0, vmax=3) +# +# # Set axis labels +# ax.set_xlabel('ETA') +# ax.set_ylabel('PHI') +# ax.set_title(title) +# +# # Annotate each bin with its value +# #for i in range(len(yedges)-1): +# # for j in range(len(xedges)-1): +# # bin_value = f'{bin_values.T[i, j]:.2f}' +# # x = (xedges[j] + xedges[j+1]) / 2 +# # y = (yedges[i] + yedges[i+1]) / 2 +# # print(f"Annotating bin at ({x}, {y}) with value {bin_value}") +# # ax.text(x, y, bin_value, ha='center', va='center', color='white', fontweight='bold', fontsize=8, +# # bbox=dict(facecolor='black', alpha=0.5, edgecolor='none')) +# +# return fig + + +def main(): + + # Set up the command line parser + parser = argparse.ArgumentParser() + parser.add_argument("pkl_file_path", help = "The path to the pkl file") + parser.add_argument('-u', "--ul-year", default='run2', help = "Which year to process", choices=["UL18","UL17","UL16","UL16APV"]) + parser.add_argument('-p', "--make-plots", action='store_true', help = "Make plots from the pkl file") + args = parser.parse_args() + + year = args.ul_year + + + # Get the counts from the input hiso + histo = pickle.load(gzip.open(args.pkl_file_path))["etaphi_all"] + + cat_list = [ +# "all_events", + "cr_4l_sf", +# "cr_4l_btag_of", +# "cr_4l_btag_sf_offZ_met80" + ] + + for cat in cat_list: + print(f"\n{cat}") + + histo_proc = histo[{"category":cat}] + + if args.make_plots: + save_dir = "etaphi_plots" + os.makedirs(save_dir, exist_ok=True) + + fig = make_2d_plot(histo_proc,f"Year:{year} Category:{cat}") + save_path = os.path.join(save_dir, f"{year}_{cat}_etaphi.png") + fig.savefig(save_path) + plt.close(fig) + plt.show() + + +if __name__ == "__main__": + main() diff --git a/analysis/wwz/for_jec_11_var.py b/analysis/wwz/for_jec_11_var.py new file mode 100644 index 00000000..90bb42f8 --- /dev/null +++ b/analysis/wwz/for_jec_11_var.py @@ -0,0 +1,93 @@ + +# This is a file for things related to the 27 JEC variations +# - For things that would be too long and messy to keep in our regular scripts, but want to keep them around for reference + +SYSTS_SPECIAL = { + "UL16APV" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "Regrouped_Absolute_2016APV_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "Regrouped_HF_2016APV_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "Regrouped_EC2_2016APV_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "Regrouped_RelativeSample_2016APV_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "Regrouped_BBEC1_2016APV_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_res_j_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + }, + "UL16" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "Regrouped_Absolute_2016_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": []}, + "Regrouped_HF_2016_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": []}, + "Regrouped_EC2_2016_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": []}, + "Regrouped_RelativeSample_2016_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": []}, + "Regrouped_BBEC1_2016_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_res_j_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + }, + "UL17" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "Regrouped_Absolute_2017_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": []}, + "Regrouped_HF_2017_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": []}, + "Regrouped_EC2_2017_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": []}, + "Regrouped_RelativeSample_2017_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": []}, + "Regrouped_BBEC1_2017_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_res_j_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + }, + "UL18" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "Regrouped_Absolute_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": []}, + "Regrouped_HF_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": []}, + "Regrouped_EC2_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": []}, + "Regrouped_RelativeSample_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": []}, + "Regrouped_BBEC1_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_res_j_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + }, + "run2" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "CMS_res_j_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "CMS_scale_met_unclustered_energy_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "Regrouped_Absolute_2018_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "Regrouped_HF_2018_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "Regrouped_EC2_2018_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "Regrouped_RelativeSample_2018_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + "Regrouped_BBEC1_2018_uncorrelated" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, + + "CMS_btag_fixedWP_incl_light_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "CMS_res_j_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "CMS_scale_met_unclustered_energy_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "Regrouped_Absolute_2018_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "Regrouped_HF_2018_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "Regrouped_EC2_2018_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "Regrouped_RelativeSample_2018_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + "Regrouped_BBEC1_2018_uncorrelated" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18"]}, + + "CMS_btag_fixedWP_incl_light_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "CMS_res_j_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "CMS_scale_met_unclustered_energy_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "Regrouped_Absolute_2018_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "Regrouped_HF_2018_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "Regrouped_EC2_2018_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "Regrouped_RelativeSample_2018_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + "Regrouped_BBEC1_2018_uncorrelated" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18"]}, + + "CMS_btag_fixedWP_incl_light_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "CMS_res_j_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "CMS_scale_met_unclustered_energy_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "Regrouped_Absolute_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "Regrouped_HF_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "Regrouped_EC2_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "Regrouped_RelativeSample_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + "Regrouped_BBEC1_2018_uncorrelated" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, + + }, + +} diff --git a/analysis/wwz/for_jec_27_var.py b/analysis/wwz/for_jec_27_var.py index e274829f..006d2fe8 100644 --- a/analysis/wwz/for_jec_27_var.py +++ b/analysis/wwz/for_jec_27_var.py @@ -131,7 +131,22 @@ ] SYSTS_SPECIAL = { - + "UL18" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "AbsoluteStat_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeJEREC1_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeJEREC2_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativePtEC1_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativePtEC2_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "TimePtEta_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeSample_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeStatEC_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeStatFSR_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "RelativeStatHF_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_res_j_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + }, "run2" : { "btagSFlight_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, "btagSFbc_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18"]}, diff --git a/analysis/wwz/get_wwz_yields.py b/analysis/wwz/get_wwz_yields.py index 40ac425c..c3d5aa5c 100644 --- a/analysis/wwz/get_wwz_yields.py +++ b/analysis/wwz/get_wwz_yields.py @@ -518,24 +518,31 @@ def make_syst_fig(histo_mc,mc_up_arr,mc_do_arr,syst,histo_data=None,title="test" def make_syst_plots(histo_dict,grouping_mc,grouping_data,save_dir_path,year): for var_name in histo_dict.keys(): - #print(f"\n{var_name}") + print(f"\n{var_name}") #if var_name != "njets": continue - if var_name not in TMP_VAR_LST: continue + #if var_name not in TMP_VAR_LST: continue + if var_name not in ["bdt_of_bin","bdt_sf_bin","njets","nleps"]: continue + #if (var_name not in BDT_INPUT_LST) and (var_name not in BDT_BINSUMMARY_LST): continue histo = histo_dict[var_name] cat_lst = [ - "sr_4l_sf_A", - "sr_4l_sf_B", - "sr_4l_sf_C", - "sr_4l_of_1", - "sr_4l_of_2", - "sr_4l_of_3", - "sr_4l_of_4", - "cr_4l_sf", - "cr_4l_btag_sf_offZ_met80", - "cr_4l_btag_of", - "sr_4l_of_incl", - "sr_4l_sf_incl", + #"sr_4l_sf_A", + #"sr_4l_sf_B", + #"sr_4l_sf_C", + #"sr_4l_of_1", + #"sr_4l_of_2", + #"sr_4l_of_3", + #"sr_4l_of_4", + #"cr_4l_sf", + #"cr_4l_btag_sf_offZ_met80", + #"cr_4l_btag_of", + #"sr_4l_of_incl", + #"sr_4l_sf_incl", + "cr_4l_btag_of", + "cr_4l_btag_sf_offZ_met80", + "cr_4l_sf", + "sr_4l_bdt_sf_trn", + "sr_4l_bdt_of_trn" ] # Rebin if continous variable @@ -563,8 +570,8 @@ def make_syst_plots(histo_dict,grouping_mc,grouping_data,save_dir_path,year): data_nom = merge_overflow(histo_grouped_data[{"systematic":"nominal"}]) for syst in syst_var_lst: - if syst not in jecref.JERC_LST: continue - #if "btag" not in syst: continue + #if syst not in jecref.JERC_LST: continue + if "Regrouped" not in syst: continue #if "uncorrelated" not in syst: continue #if "lepSF" not in syst: continue #if "PreFiring" not in syst: continue @@ -586,7 +593,8 @@ def make_syst_plots(histo_dict,grouping_mc,grouping_data,save_dir_path,year): if year == "all": blacklist_years = [] skip = False for y in blacklist_years: - if syst.endswith(y): + #if syst.endswith(y): + if y in syst: skip = True if skip: continue @@ -955,8 +963,8 @@ def main(): # Make plots if args.make_plots: - make_plots(histo_dict,sample_dict_mc,sample_dict_data,save_dir_path=out_path,apply_nsf_to_cr=False) - #make_syst_plots(histo_dict,sample_dict_mc,sample_dict_data,out_path,args.ul_year) # Check on individual systematics + #make_plots(histo_dict,sample_dict_mc,sample_dict_data,save_dir_path=out_path,apply_nsf_to_cr=False) + make_syst_plots(histo_dict,sample_dict_mc,sample_dict_data,out_path,args.ul_year) # Check on individual systematics #make_sr_comb_plot(histo_dict,sample_dict_mc,sample_dict_data,args.ul_year,ana_type="cb") # Make plot of all SR yields in one plot diff --git a/analysis/wwz/make_datacards.py b/analysis/wwz/make_datacards.py index 55132013..0399c417 100644 --- a/analysis/wwz/make_datacards.py +++ b/analysis/wwz/make_datacards.py @@ -12,6 +12,7 @@ import ewkcoffea.modules.yield_tools as yt import for_jec_27_var as jecref +import for_jec_11_var as jecref_11 SMALL = 0.000001 @@ -22,7 +23,7 @@ OUR_TAG = "CMS_SMP24015" # What each recognized year grouping consists of -ALL_YEARS_LST = ["UL16","UL16APV","UL17","UL18", "2022","2022EE", "2023","2023BPix"] +ALL_YEARS_LST = ["UL16","UL16APV","UL17","UL18", "2022","2022EE", "2023","2023BPix","2016","2016APV","2017","2018"] # Systs that are not correlated across years @@ -51,6 +52,34 @@ "CMS_scale_met_unclustered_energy_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17"]}, }, + "UL16APV" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_res_j_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_scale_j_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": []}, + }, + "UL16" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_res_j_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_scale_j_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2016" : {"yr_rel":"UL16", "yr_notrel": []}, + }, + "UL17" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_res_j_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_scale_j_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2017" : {"yr_rel":"UL17", "yr_notrel": []}, + }, + "UL18" : { + "CMS_btag_fixedWP_incl_light_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_btag_fixedWP_comb_bc_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_res_j_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_scale_j_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + "CMS_scale_met_unclustered_energy_2018" : {"yr_rel":"UL18", "yr_notrel": []}, + }, "run3" : { "CMS_btag_fixedWP_comb_bc_uncorrelated_2022" : {"yr_rel":"2022", "yr_notrel": ["2022EE","2023","2023BPix"]}, @@ -216,9 +245,11 @@ def make_ch_card(ch,proc_order,year_name,ch_ylds,ch_kappas=None,ch_gmn=None,extr # - Because of how we fill in the processor, the yields for per year systs come _only_ from that year # - So this function adds the nominal yields from the other three years to the up/down variation for the relevant year # - Note the in_dict is modifed in place (we do not return a copy of the dict) -def handle_per_year_systs_for_fr(in_dict,year_name,do_jec27): +def handle_per_year_systs_for_fr(in_dict,year_name,do_jec27,do_jec11): if do_jec27: systs_special = jecref.SYSTS_SPECIAL[year_name] + elif do_jec11: + systs_special = jecref_11.SYSTS_SPECIAL[year_name] else: systs_special = SYSTS_SPECIAL[year_name] for cat in in_dict["FR"].keys(): @@ -348,6 +379,10 @@ def get_syst_base_name_lst(in_lst): # E.g. if the pkl file is all r3, but we're only making a 22 datacard, skip btagSFbc_uncorrelated_2023BPix variation # Do this by checking if the syst name ends with a year, and then if that year is in our list for this card if (sys.split("_")[-1] in ALL_YEARS_LST) and (sys.split("_")[-1] not in yrs_lst): continue + #sys_year_list = [element for element in sys.split("_") if element in ALL_YEARS_LST] + + #if any (element in ALL_YEARS_LST for element in sys.split("_")): + kappa_dict[cat][sys] = {} for proc in in_dict_mc[cat]["nominal"]: @@ -493,8 +528,9 @@ def main(): parser.add_argument("--do-tf",action="store_true",help="Do the TF data-driven background estimation") parser.add_argument("--bdt",action="store_true",help="Use BDT SR bins") parser.add_argument("--jec-do-twentyseven",action="store_true",help="Use the 27 JEC uncertainty variations :(") + parser.add_argument("--jec-do-eleven",action="store_true",help="Use the 11 JEC uncertainty variations :(") parser.add_argument("--unblind",action="store_true",help="If set, use real data, otherwise use asimov data") - parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["run2","run3","y22","y23"]) + parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["run2","run3","y22","y23","UL18","UL16APV","UL16","UL17"]) args = parser.parse_args() in_file = args.in_file_name @@ -502,6 +538,7 @@ def main(): do_nuis = args.do_nuisance do_tf = args.do_tf do_jec27= args.jec_do_twentyseven + do_jec11= args.jec_do_eleven use_bdt_sr = args.bdt unblind = args.unblind run = args.run @@ -512,7 +549,7 @@ def main(): os.makedirs(out_dir) # Set a tag for center of mass energy - if run in ["run2"]: + if run in ["run2","UL18","UL16APV","UL16","UL17"]: com_tag = "13TeV" elif run in ["run3","y22","y23"]: com_tag = "13p6TeV" @@ -521,6 +558,10 @@ def main(): # Set list of years from the run name if run == "run2": yrs_lst = ["UL16APV","UL16","UL17","UL18"] + elif run == "UL16APV": yrs_lst = ["UL16APV"] + elif run == "UL16": yrs_lst = ["UL16"] + elif run == "UL17": yrs_lst = ["UL17"] + elif run == "UL18": yrs_lst = ["UL18"] elif run == "run3": yrs_lst = ["2022","2022EE","2023","2023BPix"] elif run == "y22" : yrs_lst = ["2022","2022EE"] elif run == "y23" : yrs_lst = ["2023","2023BPix"] @@ -541,7 +582,7 @@ def main(): for year in sample_names_dict_mc: yld_dict_mc_allyears[year] = yt.get_yields(histo,sample_names_dict_mc[year]) if do_nuis: - handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27) + handle_per_year_systs_for_fr(yld_dict_mc_allyears,run,do_jec27,do_jec11) yld_dict_mc = yld_dict_mc_allyears["FR"] yld_dict_data = yt.get_yields(histo,sample_names_dict_data["FR"]) @@ -596,7 +637,7 @@ def main(): cat_lst_cr = ["cr_4l_btag_of_1b", "cr_4l_btag_of_2b", "cr_4l_btag_sf_offZ_met80_1b", "cr_4l_btag_sf_offZ_met80_2b","cr_4l_sf"] cat_lst_sr = sg.CAT_LST_CB if use_bdt_sr: - if run in ["run2"]: + if run in ["run2","UL16APV","UL16","UL17","UL18"]: cat_lst_sr = sg.CAT_LST_BDT elif run in ["run3", "y22", "y23"]: cat_lst_sr = sg.CAT_LST_BDT_COARSE @@ -610,7 +651,7 @@ def main(): for ch in cat_lst: # Use real data in CRs - if ch in cat_lst_cr: unblind = True + #if ch in cat_lst_cr: unblind = True # Get just the info we want to put in the card in str form rate_for_dc_ch = get_rate_for_dc(yld_dict_mc,yld_dict_data,ch,unblind) diff --git a/analysis/wwz/wwz4l.py b/analysis/wwz/wwz4l.py index 6e9484c7..8f586007 100644 --- a/analysis/wwz/wwz4l.py +++ b/analysis/wwz/wwz4l.py @@ -56,111 +56,124 @@ def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, self._dtype = dtype # Create the dense axes for the histograms - self._dense_axes_dict = { - "mt2" : axis.Regular(180, 0, 250, name="mt2", label="mt2"), - "met" : axis.Regular(180, 0, 300, name="met", label="met"), - "metphi": axis.Regular(180, -3.1416, 3.1416, name="metphi", label="met phi"), - "ptl4" : axis.Regular(180, 0, 500, name="ptl4", label="ptl4"), - "scalarptsum_lep" : axis.Regular(180, 0, 600, name="scalarptsum_lep", label="S_T"), - "scalarptsum_lepmet" : axis.Regular(180, 0, 600, name="scalarptsum_lepmet", label="S_T + metpt"), - "scalarptsum_lepmetjet" : axis.Regular(180, 0, 1100, name="scalarptsum_lepmetjet", label="S_T + metpt + H_T"), - "scalarptsum_jet" : axis.Regular(180, 0, 500, name="scalarptsum_jet", label="H_T"), - "mll_01": axis.Regular(180, 0, 200, name="mll_01", label="mll_l0_l1"), - "mllll": axis.Regular(180, 0, 600, name="mllll", label="mllll"), - "l0pt" : axis.Regular(180, 0, 500, name="l0pt", label="l0pt"), - "j0pt" : axis.Regular(180, 0, 500, name="j0pt", label="j0pt"), - - "abs_pdgid_sum" : axis.Regular(15, 40, 55, name="abs_pdgid_sum", label="Sum of abs pdgId for all 4 lep"), - - "w_lep0_pt" : axis.Regular(180, 0, 300, name="w_lep0_pt", label="Leading W lep pt"), - "w_lep1_pt" : axis.Regular(180, 0, 300, name="w_lep1_pt", label="Subleading W lep pt"), - "z_lep0_pt" : axis.Regular(180, 0, 300, name="z_lep0_pt", label="Leading Z lep pt"), - "z_lep1_pt" : axis.Regular(180, 0, 300, name="z_lep1_pt", label="Subleading Z lep pt"), - "w_lep0_eta" : axis.Regular(180, -3, 3, name="w_lep0_eta", label="Leading W lep eta"), - "w_lep1_eta" : axis.Regular(180, -3, 3, name="w_lep1_eta", label="Subleading W lep eta"), - "z_lep0_eta" : axis.Regular(180, -3, 3, name="z_lep0_eta", label="Leading Z lep eta"), - "z_lep1_eta" : axis.Regular(180, -3, 3, name="z_lep1_eta", label="Subleading Z lep eta"), - "w_lep0_phi" : axis.Regular(180, -3.1416, 3.1416, name="w_lep0_phi", label="Leading W lep phi"), - "w_lep1_phi" : axis.Regular(180, -3.1416, 3.1416, name="w_lep1_phi", label="Subleading W lep phi"), - "z_lep0_phi" : axis.Regular(180, -3.1416, 3.1416, name="z_lep0_phi", label="Leading Z lep phi"), - "z_lep1_phi" : axis.Regular(180, -3.1416, 3.1416, name="z_lep1_phi", label="Subleading Z lep phi"), - "mll_wl0_wl1" : axis.Regular(180, 0, 350, name="mll_wl0_wl1", label="mll(W lep0, W lep1)"), - "mll_zl0_zl1" : axis.Regular(180, 0, 200, name="mll_zl0_zl1", label="mll(Z lep0, Z lep1)"), - - "w_lep0_genPartFlav" : axis.Regular(20, 0, 20, name="w_lep0_genPartFlav", label="Leading W lep genPartFlav"), - "w_lep1_genPartFlav" : axis.Regular(20, 0, 20, name="w_lep1_genPartFlav", label="Subleading W lep genPartFlav"), - "z_lep0_genPartFlav" : axis.Regular(20, 0, 20, name="z_lep0_genPartFlav", label="Leading Z lep genPartFlav"), - "z_lep1_genPartFlav" : axis.Regular(20, 0, 20, name="z_lep1_genPartFlav", label="Subleading Z lep genPartFlav"), - - "pt_zl0_zl1" : axis.Regular(180, 0, 300, name="pt_zl0_zl1", label="pt(Zl0 + Zl1)"), - "pt_wl0_wl1" : axis.Regular(180, 0, 300, name="pt_wl0_wl1", label="pt(Wl0 + Wl1)"), - "dr_zl0_zl1" : axis.Regular(180, 0, 5, name="dr_zl0_zl1", label="dr(Zl0,Zl1)"), - "dr_wl0_wl1" : axis.Regular(180, 0, 5, name="dr_wl0_wl1", label="dr(Wl0,Wl1)"), - "dr_wleps_zleps" : axis.Regular(180, 0, 5, name="dr_wleps_zleps", label="dr((Wl0+Wl1),(Zl0,Zl1))"), - "dr_wl0_j_min" : axis.Regular(180, 0, 5, name="dr_wl0_j_min", label="min dr(Wl0,j)"), - "dr_wl1_j_min" : axis.Regular(180, 0, 5, name="dr_wl1_j_min", label="min dr(Wl1,j)"), - - "mt_4l_met" : axis.Regular(180, 0, 500, name="mt_4l_met", label="mT of 4l system and met"), - "mt_wleps_met": axis.Regular(180, 0, 300, name="mt_wleps_met", label="mT of W leptons system and met"), - "mt_wl0_met" : axis.Regular(180, 0, 300, name="mt_wl0_met", label="mT of W lep0 and met"), - "mt_wl1_met" : axis.Regular(180, 0, 300, name="mt_wl1_met", label="mT of W lep1 and met"), - - "absdphi_zl0_zl1": axis.Regular(180, 0, 3.1416, name="absdphi_zl0_zl1", label="abs dphi(Zl0,Zl1)"), - "absdphi_wl0_wl1": axis.Regular(180, 0, 3.1416, name="absdphi_wl0_wl1", label="abs dphi(Wl0,Wl1)"), - "absdphi_z_ww" : axis.Regular(180, 0, 3.1416, name="absdphi_z_ww", label="abs dphi((Zl0+Zl1),(Wl0+Wl1+met))"), - "absdphi_4l_met" : axis.Regular(180, 0, 3.1416, name="absdphi_4l_met", label="abs dphi((Zl0+Zl1+Wl0+Wl1),met)"), - "absdphi_zleps_met" : axis.Regular(180, 0, 3.1416, name="absdphi_zleps_met", label="absdphi((Zl0+Zl1),met)"), - "absdphi_wleps_met" : axis.Regular(180, 0, 3.1416, name="absdphi_wleps_met", label="abs dphi((Wl0+Wl1),met)"), - "absdphi_wl0_met": axis.Regular(180, 0, 3.1416, name="absdphi_wl0_met", label="abs dphi(Wl0,met)"), - "absdphi_wl1_met": axis.Regular(180, 0, 3.1416, name="absdphi_wl1_met", label="abs dphi(Wl1,met)"), - - "absdphi_min_afas" : axis.Regular(180, 0, 3.1416, name="absdphi_min_afas", label="min(abs(delta phi of all pairs))"), - "absdphi_min_afos" : axis.Regular(180, 0, 3.1416, name="absdphi_min_afos", label="min(abs(delta phi of OS pairs))"), - "absdphi_min_sfos" : axis.Regular(180, 0, 3.1416, name="absdphi_min_sfos", label="min(abs(delta phi of SFOS pairs))"), - "mll_min_afas" : axis.Regular(180, 0, 150, name="mll_min_afas", label="min mll of all pairs"), - "mll_min_afos" : axis.Regular(180, 0, 150, name="mll_min_afos", label="min mll of OF pairs"), - "mll_min_sfos" : axis.Regular(180, 0, 150, name="mll_min_sfos", label="min mll of SFOF pairs"), - - "cos_helicity_x" : axis.Regular(180, 0, 1, name="cos_helicity_x", label="cos_helicity_x"), - - "mlb_min" : axis.Regular(180, 0, 300, name="mlb_min", label="min mass(b+l)"), - "mlb_max" : axis.Regular(180, 0, 1000, name="mlb_max", label="max mass(b+l)"), - - "njets" : axis.Regular(8, 0, 8, name="njets", label="Jet multiplicity"), - "nleps" : axis.Regular(5, 0, 5, name="nleps", label="Lep multiplicity"), - "nbtagsl" : axis.Regular(3, 0, 3, name="nbtagsl", label="Loose btag multiplicity"), - "nbtagsm" : axis.Regular(4, 0, 4, name="nbtagsm", label="Medium btag multiplicity"), - - "njets_counts" : axis.Regular(30, 0, 30, name="njets_counts", label="Jet multiplicity counts"), - "nleps_counts" : axis.Regular(30, 0, 30, name="nleps_counts", label="Lep multiplicity counts"), - "nbtagsl_counts" : axis.Regular(30, 0, 30, name="nbtagsl_counts", label="Loose btag multiplicity counts"), - - "bdt_of_wwz": axis.Regular(180, 0, 1, name="bdt_of_wwz", label="Score bdt_of_wwz"), - "bdt_sf_wwz": axis.Regular(180, 0, 1, name="bdt_sf_wwz", label="Score bdt_sf_wwz"), - "bdt_of_zh" : axis.Regular(180, 0, 1, name="bdt_of_zh", label="Score bdt_of_zh"), - "bdt_sf_zh" : axis.Regular(180, 0, 1, name="bdt_sf_zh", label="Score bdt_sf_zh"), - "bdt_of_bkg" : axis.Regular(100, 0, 1, name="bdt_of_bkg", label="Score bdt_of_bkg"), - "bdt_sf_bkg" : axis.Regular(100, 0, 1, name="bdt_sf_bkg", label="Score bdt_sf_bkg"), - "bdt_of_wwz_m_zh" : axis.Regular(180, -1, 1, name="bdt_of_wwz_m_zh", label="Score bdt_of_wwz - bdt_of_zh"), - "bdt_sf_wwz_m_zh" : axis.Regular(180, -1, 1, name="bdt_sf_wwz_m_zh", label="Score bdt_sf_wwz - bdt_sf_zh"), - "bdt_of_bin" : axis.Regular(8, 0, 8, name="bdt_of_bin", label="Binned bdt_of"), - "bdt_sf_bin" : axis.Regular(8, 0, 8, name="bdt_sf_bin", label="Binned bdt_sf"), - "bdt_of_bin_coarse" : axis.Regular(4, 0, 4, name="bdt_of_bin_coarse", label="Binned bdt_of coarse bins"), - "bdt_sf_bin_coarse" : axis.Regular(4, 0, 4, name="bdt_sf_bin_coarse", label="Binned bdt_sf coarse bins"), - +# self._dense_axes_dict = { +# "mt2" : axis.Regular(180, 0, 250, name="mt2", label="mt2"), +# "met" : axis.Regular(180, 0, 300, name="met", label="met"), +# "metphi": axis.Regular(180, -3.1416, 3.1416, name="metphi", label="met phi"), +# "ptl4" : axis.Regular(180, 0, 500, name="ptl4", label="ptl4"), +# "scalarptsum_lep" : axis.Regular(180, 0, 600, name="scalarptsum_lep", label="S_T"), +# "scalarptsum_lepmet" : axis.Regular(180, 0, 600, name="scalarptsum_lepmet", label="S_T + metpt"), +# "scalarptsum_lepmetjet" : axis.Regular(180, 0, 1100, name="scalarptsum_lepmetjet", label="S_T + metpt + H_T"), +# "scalarptsum_jet" : axis.Regular(180, 0, 500, name="scalarptsum_jet", label="H_T"), +# "mll_01": axis.Regular(180, 0, 200, name="mll_01", label="mll_l0_l1"), +# "mllll": axis.Regular(180, 0, 600, name="mllll", label="mllll"), +# "l0pt" : axis.Regular(180, 0, 500, name="l0pt", label="l0pt"), +# "j0pt" : axis.Regular(180, 0, 500, name="j0pt", label="j0pt"), +# +# "abs_pdgid_sum" : axis.Regular(15, 40, 55, name="abs_pdgid_sum", label="Sum of abs pdgId for all 4 lep"), +# +# "w_lep0_pt" : axis.Regular(180, 0, 300, name="w_lep0_pt", label="Leading W lep pt"), +# "w_lep1_pt" : axis.Regular(180, 0, 300, name="w_lep1_pt", label="Subleading W lep pt"), +# "z_lep0_pt" : axis.Regular(180, 0, 300, name="z_lep0_pt", label="Leading Z lep pt"), +# "z_lep1_pt" : axis.Regular(180, 0, 300, name="z_lep1_pt", label="Subleading Z lep pt"), +# "w_lep0_eta" : axis.Regular(180, -3, 3, name="w_lep0_eta", label="Leading W lep eta"), +# "w_lep1_eta" : axis.Regular(180, -3, 3, name="w_lep1_eta", label="Subleading W lep eta"), +# "z_lep0_eta" : axis.Regular(180, -3, 3, name="z_lep0_eta", label="Leading Z lep eta"), +# "z_lep1_eta" : axis.Regular(180, -3, 3, name="z_lep1_eta", label="Subleading Z lep eta"), +# "w_lep0_phi" : axis.Regular(180, -3.1416, 3.1416, name="w_lep0_phi", label="Leading W lep phi"), +# "w_lep1_phi" : axis.Regular(180, -3.1416, 3.1416, name="w_lep1_phi", label="Subleading W lep phi"), +# "z_lep0_phi" : axis.Regular(180, -3.1416, 3.1416, name="z_lep0_phi", label="Leading Z lep phi"), +# "z_lep1_phi" : axis.Regular(180, -3.1416, 3.1416, name="z_lep1_phi", label="Subleading Z lep phi"), +# "mll_wl0_wl1" : axis.Regular(180, 0, 350, name="mll_wl0_wl1", label="mll(W lep0, W lep1)"), +# "mll_zl0_zl1" : axis.Regular(180, 0, 200, name="mll_zl0_zl1", label="mll(Z lep0, Z lep1)"), +# +# "w_lep0_genPartFlav" : axis.Regular(20, 0, 20, name="w_lep0_genPartFlav", label="Leading W lep genPartFlav"), +# "w_lep1_genPartFlav" : axis.Regular(20, 0, 20, name="w_lep1_genPartFlav", label="Subleading W lep genPartFlav"), +# "z_lep0_genPartFlav" : axis.Regular(20, 0, 20, name="z_lep0_genPartFlav", label="Leading Z lep genPartFlav"), +# "z_lep1_genPartFlav" : axis.Regular(20, 0, 20, name="z_lep1_genPartFlav", label="Subleading Z lep genPartFlav"), +# +# "pt_zl0_zl1" : axis.Regular(180, 0, 300, name="pt_zl0_zl1", label="pt(Zl0 + Zl1)"), +# "pt_wl0_wl1" : axis.Regular(180, 0, 300, name="pt_wl0_wl1", label="pt(Wl0 + Wl1)"), +# "dr_zl0_zl1" : axis.Regular(180, 0, 5, name="dr_zl0_zl1", label="dr(Zl0,Zl1)"), +# "dr_wl0_wl1" : axis.Regular(180, 0, 5, name="dr_wl0_wl1", label="dr(Wl0,Wl1)"), +# "dr_wleps_zleps" : axis.Regular(180, 0, 5, name="dr_wleps_zleps", label="dr((Wl0+Wl1),(Zl0,Zl1))"), +# "dr_wl0_j_min" : axis.Regular(180, 0, 5, name="dr_wl0_j_min", label="min dr(Wl0,j)"), +# "dr_wl1_j_min" : axis.Regular(180, 0, 5, name="dr_wl1_j_min", label="min dr(Wl1,j)"), +# +# "mt_4l_met" : axis.Regular(180, 0, 500, name="mt_4l_met", label="mT of 4l system and met"), +# "mt_wleps_met": axis.Regular(180, 0, 300, name="mt_wleps_met", label="mT of W leptons system and met"), +# "mt_wl0_met" : axis.Regular(180, 0, 300, name="mt_wl0_met", label="mT of W lep0 and met"), +# "mt_wl1_met" : axis.Regular(180, 0, 300, name="mt_wl1_met", label="mT of W lep1 and met"), +# +# "absdphi_zl0_zl1": axis.Regular(180, 0, 3.1416, name="absdphi_zl0_zl1", label="abs dphi(Zl0,Zl1)"), +# "absdphi_wl0_wl1": axis.Regular(180, 0, 3.1416, name="absdphi_wl0_wl1", label="abs dphi(Wl0,Wl1)"), +# "absdphi_z_ww" : axis.Regular(180, 0, 3.1416, name="absdphi_z_ww", label="abs dphi((Zl0+Zl1),(Wl0+Wl1+met))"), +# "absdphi_4l_met" : axis.Regular(180, 0, 3.1416, name="absdphi_4l_met", label="abs dphi((Zl0+Zl1+Wl0+Wl1),met)"), +# "absdphi_zleps_met" : axis.Regular(180, 0, 3.1416, name="absdphi_zleps_met", label="absdphi((Zl0+Zl1),met)"), +# "absdphi_wleps_met" : axis.Regular(180, 0, 3.1416, name="absdphi_wleps_met", label="abs dphi((Wl0+Wl1),met)"), +# "absdphi_wl0_met": axis.Regular(180, 0, 3.1416, name="absdphi_wl0_met", label="abs dphi(Wl0,met)"), +# "absdphi_wl1_met": axis.Regular(180, 0, 3.1416, name="absdphi_wl1_met", label="abs dphi(Wl1,met)"), +# +# "absdphi_min_afas" : axis.Regular(180, 0, 3.1416, name="absdphi_min_afas", label="min(abs(delta phi of all pairs))"), +# "absdphi_min_afos" : axis.Regular(180, 0, 3.1416, name="absdphi_min_afos", label="min(abs(delta phi of OS pairs))"), +# "absdphi_min_sfos" : axis.Regular(180, 0, 3.1416, name="absdphi_min_sfos", label="min(abs(delta phi of SFOS pairs))"), +# "mll_min_afas" : axis.Regular(180, 0, 150, name="mll_min_afas", label="min mll of all pairs"), +# "mll_min_afos" : axis.Regular(180, 0, 150, name="mll_min_afos", label="min mll of OF pairs"), +# "mll_min_sfos" : axis.Regular(180, 0, 150, name="mll_min_sfos", label="min mll of SFOF pairs"), +# +# "cos_helicity_x" : axis.Regular(180, 0, 1, name="cos_helicity_x", label="cos_helicity_x"), +# +# "mlb_min" : axis.Regular(180, 0, 300, name="mlb_min", label="min mass(b+l)"), +# "mlb_max" : axis.Regular(180, 0, 1000, name="mlb_max", label="max mass(b+l)"), +# +# "njets" : axis.Regular(8, 0, 8, name="njets", label="Jet multiplicity"), +# "nleps" : axis.Regular(5, 0, 5, name="nleps", label="Lep multiplicity"), +# "nbtagsl" : axis.Regular(3, 0, 3, name="nbtagsl", label="Loose btag multiplicity"), +# "nbtagsm" : axis.Regular(4, 0, 4, name="nbtagsm", label="Medium btag multiplicity"), +# +# "njets_counts" : axis.Regular(30, 0, 30, name="njets_counts", label="Jet multiplicity counts"), +# "nleps_counts" : axis.Regular(30, 0, 30, name="nleps_counts", label="Lep multiplicity counts"), +# "nbtagsl_counts" : axis.Regular(30, 0, 30, name="nbtagsl_counts", label="Loose btag multiplicity counts"), +# +# "bdt_of_wwz": axis.Regular(180, 0, 1, name="bdt_of_wwz", label="Score bdt_of_wwz"), +# "bdt_sf_wwz": axis.Regular(180, 0, 1, name="bdt_sf_wwz", label="Score bdt_sf_wwz"), +# "bdt_of_zh" : axis.Regular(180, 0, 1, name="bdt_of_zh", label="Score bdt_of_zh"), +# "bdt_sf_zh" : axis.Regular(180, 0, 1, name="bdt_sf_zh", label="Score bdt_sf_zh"), +# "bdt_of_bkg" : axis.Regular(100, 0, 1, name="bdt_of_bkg", label="Score bdt_of_bkg"), +# "bdt_sf_bkg" : axis.Regular(100, 0, 1, name="bdt_sf_bkg", label="Score bdt_sf_bkg"), +# "bdt_of_wwz_m_zh" : axis.Regular(180, -1, 1, name="bdt_of_wwz_m_zh", label="Score bdt_of_wwz - bdt_of_zh"), +# "bdt_sf_wwz_m_zh" : axis.Regular(180, -1, 1, name="bdt_sf_wwz_m_zh", label="Score bdt_sf_wwz - bdt_sf_zh"), +# "bdt_of_bin" : axis.Regular(8, 0, 8, name="bdt_of_bin", label="Binned bdt_of"), +# "bdt_sf_bin" : axis.Regular(8, 0, 8, name="bdt_sf_bin", label="Binned bdt_sf"), +# "bdt_of_bin_coarse" : axis.Regular(4, 0, 4, name="bdt_of_bin_coarse", label="Binned bdt_of coarse bins"), +# "bdt_sf_bin_coarse" : axis.Regular(4, 0, 4, name="bdt_sf_bin_coarse", label="Binned bdt_sf coarse bins"), +# +# } + + + + self._histo_dict = { + "etaphi_all": hist.Hist( + hist.axis.StrCategory([], growth=True, name="category", label="category"), + axis.Regular(40, -2.4, 2.4, name="eta", label="eta"), + axis.Regular(40, -3.1416, 3.1416, name="phi", label="phi"), + storage="double", # Keeps track of sumw2 + name="Counts", + ) } + # Add histograms to dictionary that will be passed on to dict_accumulator dout = {} - for dense_axis_name in self._dense_axes_dict.keys(): - dout[dense_axis_name] = hist.Hist( - hist.axis.StrCategory([], growth=True, name="process", label="process"), - hist.axis.StrCategory([], growth=True, name="category", label="category"), - hist.axis.StrCategory([], growth=True, name="systematic", label="systematic"), - self._dense_axes_dict[dense_axis_name], - storage="weight", # Keeps track of sumw2 - name="Counts", - ) +# for dense_axis_name in self._dense_axes_dict.keys(): +# dout[dense_axis_name] = hist.Hist( +# hist.axis.StrCategory([], growth=True, name="process", label="process"), +# hist.axis.StrCategory([], growth=True, name="category", label="category"), +# hist.axis.StrCategory([], growth=True, name="systematic", label="systematic"), +# self._dense_axes_dict[dense_axis_name], +# storage="weight", # Keeps track of sumw2 +# name="Counts", +# ) # Adding list accumulators for BDT output variables and weights if siphon_bdt_data: @@ -448,12 +461,40 @@ def process(self, events): ######### The rest of the processor is inside this loop over systs that affect object kinematics ########### + do_full_jec_list = False # toggle switch for 27 uncertainties + do_reduced_jec_list = True # toggle switch for 11 uncertainties + + if do_full_jec_list and do_reduced_jec_list: + raise Exception("Cannot do both the full and reduced JEC uncertainties") + + if do_full_jec_list: + obj_correction_systs = [ + "AbsoluteMPFBias_correlated","AbsoluteScale_correlated","FlavorQCD_correlated","Fragmentation_correlated","PileUpDataMC_correlated", + "PileUpPtBB_correlated","PileUpPtEC1_correlated","PileUpPtEC2_correlated","PileUpPtHF_correlated","PileUpPtRef_correlated", + "RelativeFSR_correlated","RelativeJERHF_correlated","RelativePtBB_correlated","RelativePtHF_correlated","RelativeBal_correlated", + "SinglePionECAL_correlated","SinglePionHCAL_correlated", + f"AbsoluteStat_uncorrelated_{year}",f"RelativeJEREC1_uncorrelated_{year}",f"RelativeJEREC2_uncorrelated_{year}",f"RelativePtEC1_uncorrelated_{year}",f"RelativePtEC2_uncorrelated_{year}", + f"TimePtEta_uncorrelated_{year}",f"RelativeSample_uncorrelated_{year}",f"RelativeStatEC_uncorrelated_{year}",f"RelativeStatFSR_uncorrelated_{year}",f"RelativeStatHF_uncorrelated_{year}", + f"CMS_res_j_{year}", + f"CMS_scale_met_unclustered_energy_{year}", + ] + + elif do_reduced_jec_list: + obj_correction_systs = [ + "Regrouped_FlavorQCD_correlated","Regrouped_RelativeBal_correlated","Regrouped_HF_correlated","Regrouped_BBEC1_correlated","Regrouped_EC2_correlated", + "Regrouped_Absolute_correlated", + f"Regrouped_Absolute_{year}_uncorrelated",f"Regrouped_HF_{year}_uncorrelated",f"Regrouped_EC2_{year}_uncorrelated",f"Regrouped_RelativeSample_{year}_uncorrelated",f"Regrouped_BBEC1_{year}_uncorrelated", + f"CMS_res_j_{year}", + f"CMS_scale_met_unclustered_energy_{year}", + ] + + else: + obj_correction_systs = [ + f"CMS_scale_j_{year}", + f"CMS_res_j_{year}", + f"CMS_scale_met_unclustered_energy_{year}", + ] - obj_correction_systs = [ - f"CMS_scale_j_{year}", - f"CMS_res_j_{year}", - f"CMS_scale_met_unclustered_energy_{year}", - ] obj_correction_systs = append_up_down_to_sys_base(obj_correction_systs) # If we're doing systematics and this isn't data, we will loop over the obj correction syst lst list @@ -1185,81 +1226,109 @@ def process(self, events): } - # Set up the list of weight fluctuations to loop over - # For now the syst do not depend on the category, so we can figure this out outside of the filling loop - wgt_var_lst = ["nominal"] - if self._do_systematics: - if not isData: - if (obj_corr_syst_var != "nominal"): - # In this case, we are dealing with systs that change the kinematics of the objs (e.g. JES) - # So we don't want to loop over up/down weight variations here - wgt_var_lst = [obj_corr_syst_var] - else: - # Otherwise we want to loop over the up/down weight variations - wgt_var_lst = wgt_var_lst + wgt_correction_syst_lst - - - - # Loop over the hists we want to fill - for dense_axis_name, dense_axis_vals in dense_variables_dict.items(): - if dense_axis_name not in self._hist_lst: - #print(f"Skipping \"{dense_axis_name}\", it is not in the list of hists to include.") - continue - #print("\ndense_axis_name,vals",dense_axis_name) - #print("\ndense_axis_name,vals",vals) - - # Loop over weight fluctuations - for wgt_fluct in wgt_var_lst: - - # Get the appropriate weight fluctuation - if (wgt_fluct == "nominal") or (wgt_fluct in obj_corr_syst_var_list): - # In the case of "nominal", no weight systematic variation is used - weight = weights_obj_base_for_kinematic_syst.weight(None) - else: - # Otherwise get the weight from the Weights object - weight = weights_obj_base_for_kinematic_syst.weight(wgt_fluct) - - - # Loop over categories - for sr_cat in cat_dict["lep_chan_lst"]: - - # Skip filling if this variable is not relevant for this selection - if (dense_axis_name in exclude_var_dict) and (sr_cat in exclude_var_dict[dense_axis_name]): continue - - # If this is a counts hist, forget the weights and just fill with unit weights - if dense_axis_name.endswith("_counts"): weight = events.nom - #else: weights = weights_obj_base_for_kinematic_syst.partial_weight(include=["norm"]) # For testing - #else: weights = weights_obj_base_for_kinematic_syst.weight(None) # For testing - - # Make the cuts mask - cuts_lst = [sr_cat] - if isData: cuts_lst.append("is_good_lumi") # Apply golden json requirements if this is data - all_cuts_mask = selections.all(*cuts_lst) - - #run = events.run[all_cuts_mask] - #luminosityBlock = events.luminosityBlock[all_cuts_mask] - #event = events.event[all_cuts_mask] - #w = weights[all_cuts_mask] - #if dense_axis_name == "njets": - # print("\nSTARTPRINT") - # for i,j in enumerate(w): - # out_str = f"PRINTTAG {i} {dense_axis_name} {year} {sr_cat} {event[i]} {run[i]} {luminosityBlock[i]} {w[i]}" - # print(out_str,file=sys.stderr,flush=True) - # print("ENDPRINT\n") - #print("\ndense_axis_name",dense_axis_name) - #print("sr_cat",sr_cat) - #print("dense_axis_vals[all_cuts_mask]",dense_axis_vals[all_cuts_mask]) - #print("end") - - # Fill the histos - axes_fill_info_dict = { - dense_axis_name : dense_axis_vals[all_cuts_mask], - "weight" : weight[all_cuts_mask], - "process" : histAxisName, - "category" : sr_cat, - "systematic" : wgt_fluct, - } - self.accumulator[dense_axis_name].fill(**axes_fill_info_dict) +# # Set up the list of weight fluctuations to loop over +# # For now the syst do not depend on the category, so we can figure this out outside of the filling loop +# wgt_var_lst = ["nominal"] +# if self._do_systematics: +# if not isData: +# if (obj_corr_syst_var != "nominal"): +# # In this case, we are dealing with systs that change the kinematics of the objs (e.g. JES) +# # So we don't want to loop over up/down weight variations here +# wgt_var_lst = [obj_corr_syst_var] +# else: +# # Otherwise we want to loop over the up/down weight variations +# wgt_var_lst = wgt_var_lst + wgt_correction_syst_lst + + #for sr_cat in ["cr_4l_sf","cr_4l_btag_of","cr_4l_btag_sf_offZ_met80"]: + if isData: + sr_cat = "all_events" + cuts_lst = [sr_cat] + cuts_lst.append("is_good_lumi") # Apply golden json requirements if this is data + all_cuts_mask = selections.all(*cuts_lst) + + hout = self._histo_dict + dense_axis_name = "etaphi_all" + + jets_sel = ak.flatten(goodJets[all_cuts_mask]) + eta_sel = jets_sel.eta + phi_sel = jets_sel.phi + weights = ak.ones_like(jets_sel.pt) + + + # Fill the histo + + axes_fill_info_dict = { + "category" : sr_cat, + "weight" : weights, + "eta" : eta_sel, + "phi" : phi_sel, + } + hout[dense_axis_name].fill(**axes_fill_info_dict) + + return hout + + + +# # Loop over the hists we want to fill +# for dense_axis_name, dense_axis_vals in dense_variables_dict.items(): +# if dense_axis_name not in self._hist_lst: +# #print(f"Skipping \"{dense_axis_name}\", it is not in the list of hists to include.") +# continue +# #print("\ndense_axis_name,vals",dense_axis_name) +# #print("\ndense_axis_name,vals",vals) +# +# # Loop over weight fluctuations +# for wgt_fluct in wgt_var_lst: +# +# # Get the appropriate weight fluctuation +# if (wgt_fluct == "nominal") or (wgt_fluct in obj_corr_syst_var_list): +# # In the case of "nominal", no weight systematic variation is used +# weight = weights_obj_base_for_kinematic_syst.weight(None) +# else: +# # Otherwise get the weight from the Weights object +# weight = weights_obj_base_for_kinematic_syst.weight(wgt_fluct) +# +# +# # Loop over categories +# for sr_cat in cat_dict["lep_chan_lst"]: +# +# # Skip filling if this variable is not relevant for this selection +# if (dense_axis_name in exclude_var_dict) and (sr_cat in exclude_var_dict[dense_axis_name]): continue +# +# # If this is a counts hist, forget the weights and just fill with unit weights +# if dense_axis_name.endswith("_counts"): weight = events.nom +# #else: weights = weights_obj_base_for_kinematic_syst.partial_weight(include=["norm"]) # For testing +# #else: weights = weights_obj_base_for_kinematic_syst.weight(None) # For testing +# +# # Make the cuts mask +# cuts_lst = [sr_cat] +# if isData: cuts_lst.append("is_good_lumi") # Apply golden json requirements if this is data +# all_cuts_mask = selections.all(*cuts_lst) +# +# #run = events.run[all_cuts_mask] +# #luminosityBlock = events.luminosityBlock[all_cuts_mask] +# #event = events.event[all_cuts_mask] +# #w = weights[all_cuts_mask] +# #if dense_axis_name == "njets": +# # print("\nSTARTPRINT") +# # for i,j in enumerate(w): +# # out_str = f"PRINTTAG {i} {dense_axis_name} {year} {sr_cat} {event[i]} {run[i]} {luminosityBlock[i]} {w[i]}" +# # print(out_str,file=sys.stderr,flush=True) +# # print("ENDPRINT\n") +# #print("\ndense_axis_name",dense_axis_name) +# #print("sr_cat",sr_cat) +# #print("dense_axis_vals[all_cuts_mask]",dense_axis_vals[all_cuts_mask]) +# #print("end") +# +# # Fill the histos +# axes_fill_info_dict = { +# dense_axis_name : dense_axis_vals[all_cuts_mask], +# "weight" : weight[all_cuts_mask], +# "process" : histAxisName, +# "category" : sr_cat, +# "systematic" : wgt_fluct, +# } +# self.accumulator[dense_axis_name].fill(**axes_fill_info_dict) # Fill the list accumulator if self._siphon_bdt_data: diff --git a/ewkcoffea/modules/corrections.py b/ewkcoffea/modules/corrections.py index 134adf69..acd924f5 100644 --- a/ewkcoffea/modules/corrections.py +++ b/ewkcoffea/modules/corrections.py @@ -2,6 +2,7 @@ import pickle import gzip import awkward as ak +import re from collections import OrderedDict @@ -534,18 +535,25 @@ def ApplyJetCorrections(year,isData, era): if year in ['2022','2022EE','2023','2023BPix']: jec_year = year[:4] + '_Summer' + year[2:] + if year in ["2016","2016APV"]: + reg_year = "2016" + else: + reg_year = year json_path = topcoffea_path(f"data/POG/JME/{jec_year}/jet_jerc.json.gz") #json_path = topcoffea_path(f"data/POG/JME/{jec_year}/fatjet_jerc.json.gz") jec_types_clib = [ - "AbsoluteMPFBias","AbsoluteScale","FlavorQCD","Fragmentation","PileUpDataMC", - "PileUpPtBB","PileUpPtEC1","PileUpPtEC2","PileUpPtHF","PileUpPtRef", - "RelativeFSR","RelativeJERHF","RelativePtBB","RelativePtHF","RelativeBal", - "SinglePionECAL","SinglePionHCAL", - "AbsoluteStat","RelativeJEREC1","RelativeJEREC2","RelativePtEC1","RelativePtEC2", - "TimePtEta","RelativeSample","RelativeStatEC","RelativeStatFSR","RelativeStatHF", - "Total", + #"AbsoluteMPFBias","AbsoluteScale","FlavorQCD","Fragmentation","PileUpDataMC", + #"PileUpPtBB","PileUpPtEC1","PileUpPtEC2","PileUpPtHF","PileUpPtRef", + #"RelativeFSR","RelativeJERHF","RelativePtBB","RelativePtHF","RelativeBal", + #"SinglePionECAL","SinglePionHCAL", + #"AbsoluteStat","RelativeJEREC1","RelativeJEREC2","RelativePtEC1","RelativePtEC2", + #"TimePtEta","RelativeSample","RelativeStatEC","RelativeStatFSR","RelativeStatHF", + "Regrouped_FlavorQCD","Regrouped_RelativeBal","Regrouped_HF","Regrouped_BBEC1","Regrouped_EC2", + "Regrouped_Absolute", + f"Regrouped_Absolute_{reg_year}",f"Regrouped_HF_{reg_year}",f"Regrouped_EC2_{reg_year}",f"Regrouped_RelativeSample_{reg_year}",f"Regrouped_BBEC1_{reg_year}", + #"Total", ] jec_regroup_clib = [f"Quad_{jec_tag}_UncertaintySources_{jec_type}_{jet_algo}" for jec_type in jec_types_clib] jec_names_clib = [ @@ -593,7 +601,20 @@ def ApplyJetSystematics(year,cleanedJets,syst_var): elif (syst_var == f'CMS_scale_j_{year}Down'): return cleanedJets.JES_Total.down else: - raise Exception(f"Unsupported systematic variation: {syst_var}") + try: + if syst_var.startswith("Regrouped"): + syst = "JES_" + re.sub(r'_[^_]*$', '', syst_var) + if syst.endswith("APV"): + syst = syst[:-3] + else: + syst = "JES_" + syst_var.split('_')[0] + attribute = getattr(cleanedJets,syst) + if syst_var.endswith('Up'): + return attribute.up + elif syst_var.endswith('Down'): + return attribute.down + except AttributeError: + raise ValueError(f"Unsupported systematic variation: {syst} and {syst_var}") def ApplyJetVetoMaps(jets,year): diff --git a/ewkcoffea/params/rate_systs.json b/ewkcoffea/params/rate_systs.json index 091f9bc8..cfb9f61f 100644 --- a/ewkcoffea/params/rate_systs.json +++ b/ewkcoffea/params/rate_systs.json @@ -2,6 +2,10 @@ "rate_uncertainties_all_proc" : { "lumi": { "run2" : 1.016, + "UL18" : 1.016, + "UL17" : 1.016, + "UL16" : 1.016, + "UL16APV" : 1.016, "run3" : 1.014, "y22" : 1.014, "y23" : 1.014 @@ -17,6 +21,10 @@ "val" : "1.3", "decorrelation_dict" : { "run2" : "13TeV", + "UL18" : "13TeV", + "UL17" : "13TeV", + "UL16" : "13TeV", + "UL16APV" : "13TeV", "run3" : "13p6TeV", "y22" : "13p6TeV", "y23" : "13p6TeV"