Skip to content

Commit

Permalink
Merge pull request #17 from cmstas/datacards
Browse files Browse the repository at this point in the history
Make datacards with systematics, new tWZ, and other updates
  • Loading branch information
kmohrman authored Mar 2, 2024
2 parents d72f54a + 418b1fd commit 4ad6ef9
Show file tree
Hide file tree
Showing 14 changed files with 1,018 additions and 366 deletions.
430 changes: 170 additions & 260 deletions analysis/wwz/get_wwz_yields.py

Large diffs are not rendered by default.

400 changes: 347 additions & 53 deletions analysis/wwz/make_datacards.py

Large diffs are not rendered by default.

38 changes: 37 additions & 1 deletion analysis/wwz/make_sample_lst_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,38 @@
############################ Bkg samples ############################


# Get some from TOP-22-006 skims #
central_UL16APV_22006_dict = {
"UL16APV_tWZ4l" : {
"path" : "/store/user/rucio/kmohrman/skims/mc/new-lepMVA-v2/central_bkgd_p7/TWZToLL/v1/UL16APV_TWZToLL_tlept_Wlept/",
"histAxisName": "UL16APV_tWZ4l",
"xsecName": "TWZToLL_tlept_Wlept",
}
}
central_UL16_22006_dict = {
"UL16_tWZ4l" : {
"path" : "/store/user/rucio/kmohrman/skims/mc/new-lepMVA-v2/central_bkgd_p7/TWZToLL/v1/UL16_TWZToLL_tlept_Wlept/",
"histAxisName": "UL16_tWZ4l",
"xsecName": "TWZToLL_tlept_Wlept",
}
}
central_UL17_22006_dict = {
"UL17_tWZ4l" : {
"path" : "/store/user/rucio/kmohrman/skims/mc/new-lepMVA-v2/central_bkgd_p7/TWZToLL/v1/UL17_TWZToLL_tlept_Wlept/",
"histAxisName": "UL17_tWZ4l",
"xsecName": "TWZToLL_tlept_Wlept",
}
}
central_UL18_22006_dict = {
"UL18_tWZ4l" : {
"path" : "/store/user/rucio/kmohrman/skims/mc/new-lepMVA-v2/central_bkgd_p7/TWZToLL/v1/UL18_TWZToLL_tlept_Wlept/",
"histAxisName": "UL18_tWZ4l",
"xsecName": "TWZToLL_tlept_Wlept",
}
}


# All the rest of the backgrounds (located at ucsd)
central_UL16APV_bkg_dict = {

"UL16APV_ZZTo4L" : {
Expand Down Expand Up @@ -386,7 +418,7 @@ def main():

# A simple example
#make_jsons_for_dict_of_samples(test_wwz_dict, "/ceph/cms/","2017",".",on_das=False) # An example
make_jsons_for_dict_of_samples(ci_dict, "","2017","../../input_samples/sample_jsons/test_samples/",on_das=False) # For CI json
#make_jsons_for_dict_of_samples(ci_dict, "","2017","../../input_samples/sample_jsons/test_samples/",on_das=False) # For CI json

# Specify output paths
jsons_path = "../../input_samples/sample_jsons/"
Expand All @@ -404,6 +436,10 @@ def main():
#make_jsons_for_dict_of_samples(data_UL18, "/ceph/cms/","2018", out_dir_data_18,on_das=False)

# Make configs for bkg samples
#make_jsons_for_dict_of_samples(central_UL16APV_22006_dict, "/cmsuf/data/","2016APV", out_dir_bkg,on_das=False)
#make_jsons_for_dict_of_samples(central_UL16_22006_dict, "/cmsuf/data/","2016", out_dir_bkg,on_das=False)
#make_jsons_for_dict_of_samples(central_UL17_22006_dict, "/cmsuf/data/","2017", out_dir_bkg,on_das=False)
#make_jsons_for_dict_of_samples(central_UL18_22006_dict, "/cmsuf/data/","2018", out_dir_bkg,on_das=False)
make_jsons_for_dict_of_samples(central_UL16APV_bkg_dict, "/ceph/cms/","2016APV", out_dir_bkg,on_das=False)
make_jsons_for_dict_of_samples(central_UL16_bkg_dict, "/ceph/cms/","2016", out_dir_bkg,on_das=False)
make_jsons_for_dict_of_samples(central_UL17_bkg_dict, "/ceph/cms/","2017", out_dir_bkg,on_das=False)
Expand Down
58 changes: 21 additions & 37 deletions analysis/wwz/ref_for_ci/counts_wwz_ref.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,6 @@
1861.0,
null
],
"sr_4l_sf_presel_counts": [
1836.0,
null
],
"sr_4l_of_presel_counts": [
3433.0,
null
],
"all_events_counts": [
54868.0,
null
Expand All @@ -56,26 +48,26 @@
9865.0,
null
],
"cr_4l_btag_of_counts": [
919.0,
null
],
"cr_4l_sf_counts": [
393.0,
"sr_4l_sf_incl_counts": [
1836.0,
null
],
"cr_4l_btag_sf_counts": [
955.0,
"sr_4l_of_incl_counts": [
3433.0,
null
],
"cr_4l_btag_sf_offZ_counts": [
847.0,
"cr_4l_btag_of_counts": [
919.0,
null
],
"cr_4l_btag_sf_offZ_met80_counts": [
447.0,
null
],
"cr_4l_sf_counts": [
393.0,
null
],
"sr_4l_sf_A": [
0.5511977683719258,
null
Expand Down Expand Up @@ -104,14 +96,6 @@
1.2574913943885722,
null
],
"sr_4l_sf_presel": [
1.193888668142479,
null
],
"sr_4l_of_presel": [
2.2967307064423976,
null
],
"all_events": [
44.554542092725235,
null
Expand All @@ -120,25 +104,25 @@
7.062864594060521,
null
],
"cr_4l_btag_of": [
0.8712045054150901,
null
],
"cr_4l_sf": [
0.2633144810331567,
"sr_4l_sf_incl": [
1.193888668142479,
null
],
"cr_4l_btag_sf": [
0.8585255699670182,
"sr_4l_of_incl": [
2.2967307064423976,
null
],
"cr_4l_btag_sf_offZ": [
0.7511458992925877,
"cr_4l_btag_of": [
0.8712045054150901,
null
],
"cr_4l_btag_sf_offZ_met80": [
0.39497905244658593,
null
],
"cr_4l_sf": [
0.2633144810331567,
null
]
}
}
}
24 changes: 14 additions & 10 deletions analysis/wwz/wwz4l.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ def process(self, events):
zeroj = (njets==0)

# For WWZ selection

selections.add("sr_4l_sf_A", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & sf_A))
selections.add("sr_4l_sf_B", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & sf_B))
selections.add("sr_4l_sf_C", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & sf_C))
Expand All @@ -585,10 +586,15 @@ def process(self, events):
selections.add("all_events", (events.is4lWWZ | (~events.is4lWWZ))) # All events.. this logic is a bit roundabout to just get an array of True
selections.add("4l_presel", (events.is4lWWZ)) # This matches the VVV looper selection (object selection and event selection)

selections.add("sr_4l_sf_presel", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & (met.pt > 65.0)))
selections.add("sr_4l_of_presel", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of))
selections.add("sr_4l_sf_incl", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & (met.pt >= 65.0))) # Inclusive over SF sr (only applying cuts that are applied to all SF SRs), just use for visualization
selections.add("sr_4l_of_incl", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of)) # Inclusive over OF sr (only applying cuts that are applied to all OF SRs), just use for visualization

# For BDT SRs

selections.add("sr_4l_bdt_sf_presel", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z))
selections.add("sr_4l_bdt_sf_trn" , (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & mt2_mask))
selections.add("sr_4l_bdt_of_presel", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of))

selections.add("sr_4l_bdt_sf_wwz_sr1", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of & sf_wwz_sr1))
selections.add("sr_4l_bdt_sf_wwz_sr2", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of & sf_wwz_sr2))
selections.add("sr_4l_bdt_sf_wwz_sr3", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_of & sf_wwz_sr3))
Expand All @@ -612,8 +618,6 @@ def process(self, events):
ww_em = ((abs(w_lep0.pdgId) == 11) & (abs(w_lep1.pdgId) == 13))
ww_me = ((abs(w_lep0.pdgId) == 13) & (abs(w_lep1.pdgId) == 11))
selections.add("cr_4l_btag_of", (pass_trg & events.is4lWWZ & bmask_atleast1loose & events.wwz_presel_of))
selections.add("cr_4l_btag_sf", (pass_trg & events.is4lWWZ & bmask_atleast1loose & events.wwz_presel_sf))
selections.add("cr_4l_btag_sf_offZ", (pass_trg & events.is4lWWZ & bmask_atleast1loose & events.wwz_presel_sf & w_candidates_mll_far_from_z))
selections.add("cr_4l_btag_sf_offZ_met80", (pass_trg & events.is4lWWZ & bmask_atleast1loose & events.wwz_presel_sf & w_candidates_mll_far_from_z & (met.pt > 80.0)))
selections.add("cr_4l_sf", (pass_trg & events.is4lWWZ & bmask_exactly0loose & events.wwz_presel_sf & (~w_candidates_mll_far_from_z)))

Expand All @@ -639,9 +643,9 @@ def process(self, events):
cat_dict = {
"lep_chan_lst" : [
"sr_4l_sf_A","sr_4l_sf_B","sr_4l_sf_C","sr_4l_of_1","sr_4l_of_2","sr_4l_of_3","sr_4l_of_4",
"sr_4l_sf_presel", "sr_4l_of_presel",
"all_events","4l_presel",
"cr_4l_btag_of","cr_4l_sf", "cr_4l_btag_sf", "cr_4l_btag_sf_offZ", "cr_4l_btag_sf_offZ_met80",
"all_events","4l_presel", "sr_4l_sf_incl", "sr_4l_of_incl",
"cr_4l_btag_of", "cr_4l_btag_sf_offZ_met80", "cr_4l_sf",
"sr_4l_bdt_sf_presel", "sr_4l_bdt_sf_trn", "sr_4l_bdt_of_presel",
] + bdt_sr_names
}

Expand Down Expand Up @@ -723,7 +727,7 @@ def process(self, events):
exclude_var_dict = {
"mt2" : ["all_events"],
"ptl4" : ["all_events"],
"j0pt" : ["all_events", "4l_presel", "sr_4l_sf_presel", "sr_4l_of_presel", "cr_4l_sf"] + analysis_cats,
"j0pt" : ["all_events", "4l_presel", "sr_4l_sf_incl", "sr_4l_of_incl", "sr_4l_bdt_sf_presel", "sr_4l_bdt_sf_trn", "sr_4l_bdt_of_presel", "cr_4l_sf"] + analysis_cats,
"l0pt" : ["all_events"],
"mll_01" : ["all_events"],
"mllll" : ["all_events"],
Expand Down Expand Up @@ -764,8 +768,8 @@ def process(self, events):
"mll_min_afos" : ["all_events"],
"mll_min_sfos" : ["all_events"],

"mlb_min" : ["all_events","4l_presel", "sr_4l_sf_presel", "sr_4l_of_presel", "cr_4l_sf"] + analysis_cats,
"mlb_max" : ["all_events","4l_presel", "sr_4l_sf_presel", "sr_4l_of_presel", "cr_4l_sf"] + analysis_cats,
"mlb_min" : ["all_events","4l_presel", "sr_4l_sf_incl", "sr_4l_of_incl", "sr_4l_bdt_sf_presel", "sr_4l_bdt_sf_trn", "sr_4l_bdt_of_presel", "cr_4l_sf"] + analysis_cats,
"mlb_max" : ["all_events","4l_presel", "sr_4l_sf_incl", "sr_4l_of_incl", "sr_4l_bdt_sf_presel", "sr_4l_bdt_sf_trn", "sr_4l_bdt_of_presel", "cr_4l_sf"] + analysis_cats,

"bdt_of_wwz_raw": ["all_events"],
"bdt_sf_wwz_raw": ["all_events"],
Expand Down
2 changes: 1 addition & 1 deletion ewkcoffea/modules/objects_wwz.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Clean collection b (e.g. jets) with collection a (e.g. leps)
def get_cleaned_collection(obj_collection_a,obj_collection_b,drcut=0.4):
obj_b_nearest_to_any_in_a , dr = obj_collection_b.nearest(obj_collection_a,return_metric=True)
obj_a_nearest_to_any_in_b , dr = obj_collection_b.nearest(obj_collection_a,return_metric=True)
mask = ak.fill_none(dr>drcut,True)
return obj_collection_b[mask]

Expand Down
157 changes: 157 additions & 0 deletions ewkcoffea/modules/sample_groupings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Dict to keep track of sample groupings, and associated tools


######################## Lists ########################

PROC_LST = ["WWZ","ZH","ZZ","ttZ","tWZ","WZ","other"]
SIG_LST = ["WWZ","ZH"]
BKG_LST = ["ZZ","ttZ","tWZ","WZ","other"]
CAT_LST_CB = ["sr_4l_sf_A", "sr_4l_sf_B", "sr_4l_sf_C", "sr_4l_of_1", "sr_4l_of_2", "sr_4l_of_3", "sr_4l_of_4"]


######################## Dictionaries ########################

# Map showing which CR categories are used to calculate NSFs for which SRs for which processes
BKG_TF_MAP = {

"ZZ" : {
"sr_4l_sf_A" : "cr_4l_sf",
"sr_4l_sf_B" : "cr_4l_sf",
"sr_4l_sf_C" : "cr_4l_sf",
"sr_4l_of_1" : "cr_4l_sf",
"sr_4l_of_2" : "cr_4l_sf",
"sr_4l_of_3" : "cr_4l_sf",
"sr_4l_of_4" : "cr_4l_sf",

},
"ttZ" : {
"sr_4l_sf_A" : "cr_4l_btag_sf_offZ_met80",
"sr_4l_sf_B" : "cr_4l_btag_sf_offZ_met80",
"sr_4l_sf_C" : "cr_4l_btag_sf_offZ_met80",
"sr_4l_of_1" : "cr_4l_btag_of",
"sr_4l_of_2" : "cr_4l_btag_of",
"sr_4l_of_3" : "cr_4l_btag_of",
"sr_4l_of_4" : "cr_4l_btag_of",
}
}


# The "official" groupings
SAMPLE_DICT_BASE = {
"WWZ" : ["WWZJetsTo4L2Nu"],
"ZH" : ["GluGluZH","qqToZHToZTo2L"],

#"qqZZ": ["ZZTo4l"],
#"ggZZ": ["ggToZZTo2e2mu", "ggToZZTo2e2tau", "ggToZZTo2mu2tau", "ggToZZTo4e", "ggToZZTo4mu", "ggToZZTo4tau"],
"ZZ" : ["ZZTo4l", "ggToZZTo2e2mu", "ggToZZTo2e2tau", "ggToZZTo2mu2tau", "ggToZZTo4e", "ggToZZTo4mu", "ggToZZTo4tau"],

"ttZ" : [
"TTZToLL_M_1to10",
"TTZToLLNuNu_M_10",
"TTZToQQ",
],

"tWZ" : ["tWZ4l"], # Old sample tWll

"WZ" : ["WZTo3LNu"],

"other" : [

##"WWZJetsTo4L2Nu",
##"GluGluZH","qqToZHToZTo2L",
##"ZZTo4l", "ggToZZTo2e2mu", "ggToZZTo2e2tau", "ggToZZTo2mu2tau", "ggToZZTo4e", "ggToZZTo4mu", "ggToZZTo4tau",
##"TTZToLL_M_1to10","TTZToLLNuNu_M_10","TTZToQQ",
##"tWZ4l",

##"DYJetsToLL_M_10to50_MLM",
"DYJetsToLL_M_50_MLM",
"SSWW",
"ST_antitop_t-channel",
"ST_top_s-channel",
"ST_top_t-channel",
"tbarW_noFullHad",
"ttHnobb",
"TTTo2L2Nu",
"TTWJetsToLNu",
"TTWJetsToQQ",
"tW_noFullHad",
"tZq",
"VHnobb",
##"WJetsToLNu",
"WWTo2L2Nu",
#"WZTo3LNu", # Now by itself

"WWW",
"WZZ",
"ZZZ",
],
}

# Processes indiviudally
SAMPLE_DICT_BASE_INDIV = {
"WWZJetsTo4L2Nu": ["WWZJetsTo4L2Nu"],
"GluGluZH": ["GluGluZH"],
"qqToZHToZTo2L": ["qqToZHToZTo2L"],
"ZZTo4l": ["ZZTo4l"],
"ggToZZTo2e2mu": ["ggToZZTo2e2mu"],
"ggToZZTo2e2tau": ["ggToZZTo2e2tau"],
"ggToZZTo2mu2tau": ["ggToZZTo2mu2tau"],
"ggToZZTo4e": ["ggToZZTo4e"],
"ggToZZTo4mu": ["ggToZZTo4mu"],
"ggToZZTo4tau": ["ggToZZTo4tau"],
"TTZToLL_M_1to10": ["TTZToLL_M_1to10"],
"TTZToLLNuNu_M_10": ["TTZToLLNuNu_M_10"],
"TTZToQQ": ["TTZToQQ"],
"tWZ4l" : ["tWZ4l"], # Old sample tWll

##"DYJetsToLL_M_10to50_MLM": ["DYJetsToLL_M_10to50_MLM"],
"DYJetsToLL_M_50_MLM": ["DYJetsToLL_M_50_MLM"],
"SSWW": ["SSWW"],
"ST_antitop_t-channel": ["ST_antitop_t-channel"],
"ST_top_s-channel": ["ST_top_s-channel"],
"ST_top_t-channel": ["ST_top_t-channel"],
"tbarW_noFullHad": ["tbarW_noFullHad"],
"ttHnobb": ["ttHnobb"],
"TTTo2L2Nu": ["TTTo2L2Nu"],
"TTWJetsToLNu": ["TTWJetsToLNu"],
"TTWJetsToQQ": ["TTWJetsToQQ"],
"tW_noFullHad": ["tW_noFullHad"],
"tZq": ["tZq"],
"VHnobb": ["VHnobb"],
##"WJetsToLNu": ["WJetsToLNu"],
"WWTo2L2Nu": ["WWTo2L2Nu"],
"WZTo3LNu": ["WZTo3LNu"],

"WWW" : ["WWW"],
"WZZ" : ["WZZ"],
"ZZZ" : ["ZZZ"],
}


######################## Tools ########################

# Pass dictionary with the base names for the samples, and return with full list for 4 years
def create_mc_sample_dict(in_dict,year):
out_dict = {}
if year == "all":
years = ["UL16APV","UL16","UL17","UL18"]
else:
years = [year]
for proc_group in in_dict.keys():
out_dict[proc_group] = []
for proc_base_name in in_dict[proc_group]:
for year_str in years:
out_dict[proc_group].append(f"{year_str}_{proc_base_name}")
#out_dict[proc_group].append(f"{proc_base_name}{year_str}") # TOP22006 format
return out_dict

# Get data sampel dict
def create_data_sample_dict(year):
if year == "all":
grouping_data = {'data': ["UL16APV_data","UL16_data","UL17_data","UL18_data"]}
#grouping_data = {'data': ["dataUL16APV","dataUL16","dataUL17","dataUL18"]} # TOP22006 format
else:
grouping_data = {'data': [f"{year}_data"]}
#grouping_data = {'data': [f"data{year}"]} # TOP22006 format
return grouping_data

Loading

0 comments on commit 4ad6ef9

Please sign in to comment.