Skip to content

Commit

Permalink
Merge pull request #44 from cmstas/run3_sample_update
Browse files Browse the repository at this point in the history
Sample Update
  • Loading branch information
kmohrman authored Sep 7, 2024
2 parents b03e434 + 9c03058 commit 8e32af8
Show file tree
Hide file tree
Showing 766 changed files with 33,237 additions and 11,076 deletions.
12 changes: 6 additions & 6 deletions analysis/wwz/full_run2_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/mc_sig_bkg_samples_hpg.cfg,../../input_samples/cfgs/wwz_analysis/data_samples_hpg.cfg -o wwz_histos_hpg --do-systs --hist-list njets njets_counts


### Examples with 4l skims configs ###
### Examples with 4l skims configs ### (ONLY WORKS AT UAF. SAMPLES HAVE NOT BEEN MOVED TO HPG!)

# Run at scale with futures
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim.cfg -x futures -n 200 -s 100000000 -o wwz_histos --hist-list bdt
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim.cfg -x futures -n 200 -s 100000000 -o wwz_histos --hist-list njets njets_counts --do-systs
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run2_v1.cfg -x futures -n 200 -s 100000000 -o wwz_histos --hist-list bdt
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run2_v1.cfg -x futures -n 200 -s 100000000 -o wwz_histos --hist-list njets njets_counts --do-systs

# Run with the siphon turned on (might want to comment out data in the input cfg)
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim.cfg -x futures -n 200 -o wwz_histos_siphon --siphon
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run2_v1.cfg -x futures -n 200 -o wwz_histos_siphon --siphon

# Run at scale with wq
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim.cfg -o wwz_histos_noSys --hist-list bdt
time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim.cfg -o wwz_histos_withSys --hist-list njets njets_counts --do-systs
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run2_v1.cfg -o wwz_histos_noSys --hist-list bdt
time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run2_v1.cfg -o wwz_histos_withSys --hist-list njets njets_counts --do-systs
28 changes: 22 additions & 6 deletions analysis/wwz/full_run3_run.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
# Some example run commands for R3 configs
# ALL RUN SCRIPTS ARE TO BE RUN LOCALLY AT UAF!

# Run at scale (with futures) DO NOT RUN THIS ON LOGIN NODE AT UF!
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_run3.cfg -o wwz_run3_histos_nosyst -x futures -n 200
time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_run3.cfg -o wwz_run3_histos_withsyst -x futures -n 200 --hist-list njets njets_counts --do-systs
#################################################################################################################################
# Run at scale (with futures)
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run3_v1.cfg -o wwz_run3_histos -x futures -n 200
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run3_v1.cfg -o wwz_run3_histos_withsyst -x futures -n 200 --do-systs

# Run with the siphon on DO NOT RUN THIS ON LOGIN NODE AT UF!
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_run3.cfg -o wwz_run3_histos_siphon -x futures -n 200 --siphon
# Run with the siphon on
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run3_v1.cfg -o wwz_run3_histos_siphon -x futures -n 200 --siphon

# Run at scale (with wq)
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_run3.cfg -o wwz_run3_histos --hist-list few
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_run3_v1.cfg -o wwz_run3_histos --hist-list few
#################################################################################################################################

#Run at scale (with futures) for JUST 2022
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_2022_v1.cfg -o y22_wwz_histos_noSyst -x futures -n 200 --hist-list bdt
time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_2022_v1.cfg -o y22_wwz_histos_withSyst -x futures -n 200 --do-systs --hist-list njets njets_counts

# Run at scale (with futures) for JUST 2023
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_2023_v1.cfg -o y23_wwz_histos_noSyst -x futures -n 200

#Run at scale (with futures) for scaled 2022 (This is 2022 MC + 2022 & 2023 Data)
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4lskim_scaled_run3.cfg -o y22scaled_wwz_histos_noSyst -x futures -n 200

#Run at scale (with futures) for Run 2 + Run 3 (This is scaled 2022 as explained above)
#time python run_wwz4l.py ../../input_samples/cfgs/wwz_analysis/samples_4l_run2run3.cfg -o wwz_run2run3_histos -x futures -n 200
8 changes: 4 additions & 4 deletions analysis/wwz/get_wwz_yields.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,12 +626,12 @@ def make_sr_comb_plot(histo_dict,grouping_mc,grouping_data,year,ana_type="cb"):
if ana_type == "cb":
sr_lst = sg.CAT_LST_CB
hist_label = "Cut-based SRs"
y_max = 4 # 9 is good for R2, 4 is good for R3
y_max = 8 # 9 is good for R2, 4 is good for R3
fig_size = (12,7)
elif ana_type == "bdt":
sr_lst = sg.CAT_LST_BDT
hist_label = "BDT-based SRs"
y_max = 20
y_max = 35
fig_size = (24,7)
if year == "run3":
sr_lst = sg.CAT_LST_BDT_COARSE
Expand Down Expand Up @@ -857,7 +857,7 @@ def main():
parser.add_argument("-o", "--output-path", default="plots", help = "The path the output files should be saved to")
parser.add_argument('-y', "--get-yields", action='store_true', help = "Get yields from the pkl file")
parser.add_argument('-p', "--make-plots", action='store_true', help = "Make plots from the pkl file")
parser.add_argument('-u', "--ul-year", default='run2', help = "Which year to process", choices=["all","run2","run3","UL16APV","UL16","UL17","UL18","2022","2022EE"])
parser.add_argument('-u', "--ul-year", default='run2', help = "Which year to process", choices=["all","run2","run3","y22","y23","UL16APV","UL16","UL17","UL18","2022","2022EE","2023","2023BPix"])
args = parser.parse_args()

# Get the counts from the input hiso
Expand Down Expand Up @@ -896,7 +896,7 @@ def main():

# Get the ref dict, for the relevant year
if args.ul_year in ["run2","UL18","UL17","UL16","UL16APV"]: ref_ylds = ref_dict=yd.EWK_REF
if args.ul_year in ["run3","2022","2022EE"]: ref_ylds = ref_dict=yd.EWK_REF_2022
if args.ul_year in ["run3","y22","y23","2022","2022EE","2023","2023BPix"]: ref_ylds = ref_dict=yd.EWK_REF_2022

# Dump latex table for summary of CB, BDT, CRs
hlines = [2,5] # Just summary categories
Expand Down
66 changes: 60 additions & 6 deletions analysis/wwz/make_datacards.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@
"btagSFbc_uncorrelated_2022EE" : {"yr_rel":"2022EE", "yr_notrel": ["2022"]},
}

SYSTS_SPECIAL_ALL = {
"btagSFlight_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18", "2022", "2022EE"]},
"btagSFbc_uncorrelated_2016APV" : {"yr_rel":"UL16APV", "yr_notrel": ["UL16", "UL17", "UL18", "2022", "2022EE"]},
"btagSFlight_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18", "2022", "2022EE"]},
"btagSFbc_uncorrelated_2016" : {"yr_rel":"UL16", "yr_notrel": ["UL16APV", "UL17", "UL18", "2022", "2022EE"]},
"btagSFlight_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18", "2022", "2022EE"]},
"btagSFbc_uncorrelated_2017" : {"yr_rel":"UL17", "yr_notrel": ["UL16APV", "UL16", "UL18", "2022", "2022EE"]},
"btagSFlight_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17", "2022", "2022EE"]},
"btagSFbc_uncorrelated_2018" : {"yr_rel":"UL18", "yr_notrel": ["UL16APV", "UL16", "UL17", "2022", "2022EE"]},

"btagSFbc_uncorrelated_2022" : {"yr_rel":"2022", "yr_notrel": ["UL16APV","UL16","UL17","UL18","2022EE"]},
"btagSFbc_uncorrelated_2022EE" : {"yr_rel":"2022EE", "yr_notrel": ["UL16APV","UL16","UL17","UL18","2022"]},
}

# Hard code the rateParam lines to put at the end of the card (for background normalization)
RATE_PARAM_LINES = [
"ZZ_norm rateParam * ZZ 1 [0,5]",
Expand Down Expand Up @@ -146,7 +160,9 @@ def make_ch_card(ch,proc_order,ch_ylds,ch_kappas=None,ch_gmn=None,extra_lines=No
# - So this function adds the nominal yields from the other three years to the up/down variation for the relevant year
# - Note the in_dict is modifed in place (we do not return a copy of the dict)
def handle_per_year_systs_for_fr(in_dict,year):
if year in ["2022","2022EE","run3"]:
if year == "all":
systs_special=SYSTS_SPECIAL_ALL
if year in ["2022","2022EE","run3","y22","y23"]:
systs_special=SYSTS_SPECIAL_RUN3
if year in ["UL16","UL16APV","UL17","UL18","run2"]:
systs_special=SYSTS_SPECIAL_RUN2
Expand Down Expand Up @@ -220,6 +236,24 @@ def get_rate_systs(proc_lst):

return out_dict

#Determines if the up and down variations of a systematic are in the same direction
def determine_updo_same(nom,up,down):
if nom < 0:
raise Exception("Negative values should have been fixed by this point!")
elif ((up > nom) and (down > nom)):
return True
elif ((up < nom) and (down < nom)):
return True
else: return False

#Fixes the situation when the up and down variation are in the same direction by taking the larger variation and symmetrize
def fix_updown_same(nom,up,down):
diff_1 = abs(nom - up)
diff_2 = abs(nom - down)
diff = max(diff_1,diff_2)
kappa_up = (nom + diff)/nom
kappa_down = (nom - diff)/nom
return kappa_up, kappa_down

# Get kappa dict (e.g. up/nom ratios) from the dict of all histograms
def get_kappa_dict(in_dict_mc,in_dict_data):
Expand Down Expand Up @@ -251,7 +285,8 @@ def get_syst_base_name_lst(in_lst):
valvar_kappa_do = yt.valvar_op(valvar_do,valvar_nom,"div")

# Handle negative cases
if (valvar_kappa_up[0]<=0) and (valvar_kappa_do[0]<=0): raise Exception("Both kappas negative, should not be possible.")
if (valvar_kappa_up[0]<=0) and (valvar_kappa_do[0]<=0):
raise Exception(f"Both Kappas Neagtive for process: {proc}, category: {cat}, systematic: {sys}")
if valvar_kappa_up[0] <= 0:
print(f"WARNING: Up var for {sys} for {proc} for {cat} is negative, setting to {SMALL}.")
valvar_kappa_up[0] = SMALL
Expand Down Expand Up @@ -371,7 +406,7 @@ def main():
parser.add_argument("--do-tf",action="store_true",help="Do the TF data-driven background estimation")
parser.add_argument("--bdt",action="store_true",help="Use BDT SR bins")
parser.add_argument("--unblind",action="store_true",help="If set, use real data, otherwise use asimov data")
parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["run2","run3"])
parser.add_argument('-u', "--run", default='run2', help = "Which years to process", choices=["all","run2","run3","y22","y23"])

args = parser.parse_args()
in_file = args.in_file_name
Expand All @@ -394,6 +429,15 @@ def main():
# Get the dictionary defining the mc sample grouping
sample_names_dict_data = {"FR" : sg.create_data_sample_dict(run)}
sample_names_dict_mc = {"FR" : sg.create_mc_sample_dict(run)}
if run == "all":
sample_names_dict_mc["UL16APV"] = sg.create_mc_sample_dict("UL16APV")
sample_names_dict_mc["UL16"] = sg.create_mc_sample_dict("UL16")
sample_names_dict_mc["UL17"] = sg.create_mc_sample_dict("UL17")
sample_names_dict_mc["UL18"] = sg.create_mc_sample_dict("UL18")
sample_names_dict_mc["2022"] = sg.create_mc_sample_dict("2022")
sample_names_dict_mc["2022EE"] = sg.create_mc_sample_dict("2022EE")
#sample_names_dict_mc["2023"] = sg.create_mc_sample_dict("2023")
#sample_names_dict_mc["2023BPix"] = sg.create_mc_sample_dict("2023BPix")
if run == "run2":
sample_names_dict_mc["UL16APV"] = sg.create_mc_sample_dict("UL16APV")
sample_names_dict_mc["UL16"] = sg.create_mc_sample_dict("UL16")
Expand All @@ -402,6 +446,14 @@ def main():
if run == "run3":
sample_names_dict_mc["2022"] = sg.create_mc_sample_dict("2022")
sample_names_dict_mc["2022EE"] = sg.create_mc_sample_dict("2022EE")
#sample_names_dict_mc["2023"] = sg.create_mc_sample_dict("2023")
#sample_names_dict_mc["2023BPix"] = sg.create_mc_sample_dict("2023BPix")
if run == "y22":
sample_names_dict_mc["2022"] = sg.create_mc_sample_dict("2022")
sample_names_dict_mc["2022EE"] = sg.create_mc_sample_dict("2022EE")
if run == "y23":
sample_names_dict_mc["2023"] = sg.create_mc_sample_dict("2023")
sample_names_dict_mc["2023BPix"] = sg.create_mc_sample_dict("2023BPix")

# Get yield dictionary (nested in the order: year,cat,syst,proc)
yld_dict_mc_allyears = {}
Expand All @@ -410,7 +462,6 @@ def main():
if do_nuis:
handle_per_year_systs_for_fr(yld_dict_mc_allyears,run)

# We're only looking at Full R2 (run2) or 2022 (run3) for now
yld_dict_mc = yld_dict_mc_allyears["FR"]
yld_dict_data = yt.get_yields(histo,sample_names_dict_data["FR"])

Expand Down Expand Up @@ -461,11 +512,14 @@ def main():
cat_lst_cr = ["cr_4l_btag_of_1b", "cr_4l_btag_of_2b", "cr_4l_btag_of_3b", "cr_4l_btag_sf_offZ_met80_1b", "cr_4l_btag_sf_offZ_met80_2b", "cr_4l_btag_sf_offZ_met80_3b","cr_4l_sf"]
cat_lst_sr = sg.CAT_LST_CB
if use_bdt_sr:
if run == "run2":
if run == "all":
cat_lst_sr = sg.CAT_LST_BDT
elif run in ["run2"]:
cat_lst_sr = sg.CAT_LST_BDT
elif run == "run3":
elif run in ["run3", "y22", "y23"]:
cat_lst_sr = sg.CAT_LST_BDT_COARSE
else:
print(run)
raise Exception("Unknown year")
cat_lst = cat_lst_sr + cat_lst_cr
print(f"\nMaking cards for {cat_lst}. \nPutting in {out_dir}.")
Expand Down
Loading

0 comments on commit 8e32af8

Please sign in to comment.