From 00adab9a1ca20f3a2d36ca3b0b9236ff1bdfd7b4 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 17:36:36 +0100
Subject: [PATCH 01/43] sort: met filters

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index 77f4c6ddb..0c6e5a2f4 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -81,10 +81,6 @@ def trigger_selection(selection, df, cfg):
     pass_none = ~pass_all
     dataset = df['dataset']
 
-    if df['is_data']:
-        selection.add('filt_met', mask_and(df, cfg.FILTERS.DATA))
-    else:
-        selection.add('filt_met', mask_and(df, cfg.FILTERS.MC))
     selection.add('trig_met', mask_or(df, cfg.TRIGGERS.MET))
 
     # Electron trigger overlap
@@ -121,6 +117,11 @@ def trigger_selection(selection, df, cfg):
     # Muon trigger
     selection.add('trig_mu', mask_or(df, cfg.TRIGGERS.MUON.SINGLE))
 
+    if df['is_data']:
+        selection.add('filt_met', mask_and(df, cfg.FILTERS.DATA))
+    else:
+        selection.add('filt_met', mask_and(df, cfg.FILTERS.MC))
+    
     return selection
 
 class vbfhinvProcessor(processor.ProcessorABC):

From b0cb61459551f376c22a727a7ac98f89c6c9319e Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 17:46:16 +0100
Subject: [PATCH 02/43] gather object definitions

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 65 ++++++++++++++--------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index 0c6e5a2f4..9ef52d0fd 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -211,8 +211,8 @@ def process(self, df):
         ak4 = ak4[ak4.puid]
 
         # Recalculate MET pt and phi based on npv-corrections
+        met_pt_uncorr, met_phi_uncorr = met_pt, met_phi
         if cfg.MET.XYCORR:
-            met_pt_uncorr, met_phi_uncorr = met_pt, met_phi
             met_pt, met_phi = met_xy_correction(df, met_pt, met_phi)
 
         # Muons
@@ -221,6 +221,7 @@ def process(self, df):
                       & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                       & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)
 
+        leadmuon_index=muons.pt.argmax()
         dimuons = muons.distincts()
         dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']
 
@@ -231,6 +232,7 @@ def process(self, df):
                             & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                             & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA)
 
+        leadelectron_index=electrons.pt.argmax()
         dielectrons = electrons.distincts()
         dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']
 
@@ -244,6 +246,10 @@ def process(self, df):
         muonjet_pairs = ak4[:,:1].cross(muons)
         df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min()
 
+        # photons
+        leadphoton_index=photons.pt.argmax()
+        df['is_tight_photon'] = photons.mediumId & photons.barrel
+
         # Recoil
         df['recoil_pt_uncorr'], df['recoil_phi_uncorr'] = recoil(met_pt_uncorr, met_phi_uncorr, electrons, muons, photons)
         df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons)
@@ -256,6 +262,31 @@ def process(self, df):
 
         df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0)
         df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0)
+
+        jets_for_cut = ak4[(ak4.pt > cfg.RUN.HF_PT_THRESH) & (ak4.abseta > 2.99) & (ak4.abseta < 5.0)]
+
+        # We will only consider jets that are back to back with MET i.e. dPhi(jet,MET) > 2.5
+        dphi_hfjet_met = dphi(jets_for_cut.phi, df['recoil_phi'])
+        dphimask = dphi_hfjet_met > 2.5
+        jets_for_cut = jets_for_cut[dphimask]
+
+        seta_minus_phi_alljets = jets_for_cut.setaeta - jets_for_cut.sphiphi
+
+        # Cut away the low sigma eta & phi corner (< 0.02)
+        setaphi_corner_cut = ~((jets_for_cut.setaeta < 0.02) & (jets_for_cut.sphiphi < 0.02))
+        # Sigma eta - phi < 0.02 requirement
+        setaphi_diff_cut_alljets = (seta_minus_phi_alljets < 0.02)
+
+        # For jets with |eta| > 4, we have a different requirement
+        setaphi_cut_higheta = (jets_for_cut.setaeta < 0.1) & (jets_for_cut.sphiphi > 0.02)
+
+        is_high_eta_jet = jets_for_cut.abseta > 4.0
+        setaphi_cut_alljets = (is_high_eta_jet * setaphi_cut_higheta + ~is_high_eta_jet * (setaphi_corner_cut & setaphi_diff_cut_alljets)).all()
+
+        stripsize_cut_alljets = (jets_for_cut.hfcentralstripsize < 3).all()
+
+        fail_hf_cuts = (~setaphi_cut_alljets) | (~stripsize_cut_alljets)
+        
         selection = processor.PackedSelection()
 
         # Triggers
@@ -294,6 +325,7 @@ def process(self, df):
 
         # AK4 dijet
         diak4 = ak4[:,:2].distincts()
+        leadak4_clean = diak4.i0.pt * np.cosh(diak4.i0.eta) < 6500
         leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
         trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
         hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
@@ -337,30 +369,6 @@ def get_more_forward_jeteta(diak4):
 
         # Sigma eta & phi cut (only for v8 samples because we have the info there)
         if cfg.RUN.ULEGACYV8:
-            jets_for_cut = ak4[(ak4.pt > cfg.RUN.HF_PT_THRESH) & (ak4.abseta > 2.99) & (ak4.abseta < 5.0)]
-
-            # We will only consider jets that are back to back with MET i.e. dPhi(jet,MET) > 2.5
-            dphi_hfjet_met = dphi(jets_for_cut.phi, df['recoil_phi'])
-            dphimask = dphi_hfjet_met > 2.5
-            jets_for_cut = jets_for_cut[dphimask]
-
-            seta_minus_phi_alljets = jets_for_cut.setaeta - jets_for_cut.sphiphi
-
-            # Cut away the low sigma eta & phi corner (< 0.02)
-            setaphi_corner_cut = ~((jets_for_cut.setaeta < 0.02) & (jets_for_cut.sphiphi < 0.02))
-            # Sigma eta - phi < 0.02 requirement
-            setaphi_diff_cut_alljets = (seta_minus_phi_alljets < 0.02)
-
-            # For jets with |eta| > 4, we have a different requirement
-            setaphi_cut_higheta = (jets_for_cut.setaeta < 0.1) & (jets_for_cut.sphiphi > 0.02)
-
-            is_high_eta_jet = jets_for_cut.abseta > 4.0
-            setaphi_cut_alljets = (is_high_eta_jet * setaphi_cut_higheta + ~is_high_eta_jet * (setaphi_corner_cut & setaphi_diff_cut_alljets)).all()
-            
-            stripsize_cut_alljets = (jets_for_cut.hfcentralstripsize < 3).all()
-
-            fail_hf_cuts = (~setaphi_cut_alljets) | (~stripsize_cut_alljets)
-            
             selection.add('sigma_eta_minus_phi', setaphi_cut_alljets)
             selection.add('central_stripsize_cut', stripsize_cut_alljets)
             selection.add('fail_hf_cuts', fail_hf_cuts)
@@ -390,7 +398,6 @@ def get_more_forward_jeteta(diak4):
         dphitkpf = dphi(met_phi, df['TkMET_phi'])
 
         # Reject events where the leading jet has momentum > 6.5 TeV
-        leadak4_clean = diak4.i0.pt * np.cosh(diak4.i0.eta) < 6500
         selection.add('leadak4_clean', leadak4_clean.any())
 
         # Divide into three categories for trigger study
@@ -411,7 +418,6 @@ def get_more_forward_jeteta(diak4):
             selection.add('one_fifth_mask', pass_all)
 
         # Dimuon CR
-        leadmuon_index=muons.pt.argmax()
         selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
         selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                     & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
@@ -423,7 +429,6 @@ def get_more_forward_jeteta(diak4):
         selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)
 
         # Diele CR
-        leadelectron_index=electrons.pt.argmax()
 
         selection.add('one_electron', electrons.counts==1)
         selection.add('two_electrons', electrons.counts==2)
@@ -438,10 +443,6 @@ def get_more_forward_jeteta(diak4):
         selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)
 
         # Photon CR
-        leadphoton_index=photons.pt.argmax()
-
-        df['is_tight_photon'] = photons.mediumId & photons.barrel
-
         selection.add('one_photon', photons.counts==1)
         selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
         selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)

From f4f0b377b126ed3f80f26f20b45cb316d4166245 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 17:54:07 +0100
Subject: [PATCH 03/43] gather selection definitions

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 112 +++++++++++++--------------
 1 file changed, 54 insertions(+), 58 deletions(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index 9ef52d0fd..805bc242f 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -238,9 +238,6 @@ def process(self, df):
 
         df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()
 
-        # ak4
-        leadak4_index=ak4.pt.argmax()
-
         elejet_pairs = ak4[:,:1].cross(electrons)
         df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min()
         muonjet_pairs = ak4[:,:1].cross(muons)
@@ -263,6 +260,9 @@ def process(self, df):
         df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0)
         df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0)
 
+        # ak4
+        leadak4_index=ak4.pt.argmax()
+
         jets_for_cut = ak4[(ak4.pt > cfg.RUN.HF_PT_THRESH) & (ak4.abseta > 2.99) & (ak4.abseta < 5.0)]
 
         # We will only consider jets that are back to back with MET i.e. dPhi(jet,MET) > 2.5
@@ -286,42 +286,6 @@ def process(self, df):
         stripsize_cut_alljets = (jets_for_cut.hfcentralstripsize < 3).all()
 
         fail_hf_cuts = (~setaphi_cut_alljets) | (~stripsize_cut_alljets)
-        
-        selection = processor.PackedSelection()
-
-        # Triggers
-        pass_all = np.ones(df.size)==1
-        selection.add('inclusive', pass_all)
-        selection = trigger_selection(selection, df, cfg)
-
-        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)
-
-        # Common selection
-        selection.add('veto_ele', electrons.counts==0)
-        selection.add('veto_muo', muons.counts==0)
-        selection.add('veto_photon', photons.counts==0)
-        selection.add('veto_tau', taus.counts==0)
-        selection.add('at_least_one_tau', taus.counts>0)
-        selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
-        selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
-
-        # Inverted min DPhi(j,met) cut for QCD CR
-        selection.add('mindphijr_inv', df['minDPhiJetRecoil'] <= cfg.SELECTION.SIGNAL.MINDPHIJR)
-
-        # B jets are treated using veto weights
-        # So accept them in MC, but reject in data
-        if df['is_data']:
-            selection.add('veto_b', bjets.counts==0)
-        else:
-            selection.add('veto_b', pass_all)
-
-        selection.add('dpfcalo_sr',np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO)
-        selection.add('dpfcalo_cr',np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO)
-
-        selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL)
-        selection.add('met_sr', met_pt>cfg.SELECTION.SIGNAL.RECOIL)
-
-        selection.add('calo_metptnolep', df['CaloRecoil_pt'] > 200)
 
         # AK4 dijet
         diak4 = ak4[:,:2].distincts()
@@ -353,19 +317,53 @@ def get_more_forward_jeteta(diak4):
         df['htmiss'] = ak4[ak4.pt>30].p4.sum().pt
         df['ht'] = ak4[ak4.pt>30].pt.sum()
 
+        selection = processor.PackedSelection()
+
+        # Triggers
+        pass_all = np.ones(df.size)==1
+        selection.add('inclusive', pass_all)
+        selection = trigger_selection(selection, df, cfg)
+
+        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)
+
+        # Common selection
+        selection.add('veto_ele', electrons.counts==0)
+        selection.add('veto_muo', muons.counts==0)
+        selection.add('veto_photon', photons.counts==0)
+        selection.add('veto_tau', taus.counts==0)
+        selection.add('at_least_one_tau', taus.counts>0)
+        # B jets are treated using veto weights
+        # So accept them in MC, but reject in data
+        if df['is_data']:
+            selection.add('veto_b', bjets.counts==0)
+        else:
+            selection.add('veto_b', pass_all)
+        
+        selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
+        selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
+
+        # Inverted min DPhi(j,met) cut for QCD CR
+        selection.add('mindphijr_inv', df['minDPhiJetRecoil'] <= cfg.SELECTION.SIGNAL.MINDPHIJR)
+
+        selection.add('dpfcalo_sr',np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO)
+        selection.add('dpfcalo_cr',np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO)
+
+        selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL)
+        selection.add('met_sr', met_pt>cfg.SELECTION.SIGNAL.RECOIL)
+
+        selection.add('calo_metptnolep', df['CaloRecoil_pt'] > 200)
+
         # HEM mask for 2018 data
         # For MC, we're reweighting events with the fraction of good lumi, 
         # so the cut is defined as pass_all here
+        metphihem_mask = pass_all
+        no_el_in_hem_mask = pass_all
         if df['year'] == 2018:
+            no_el_in_hem_mask = electrons[electrons_in_hem(electrons)].counts==0
             if df['is_data']:
                 metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) & (df['run'] > 319077))
-            else:
-                metphihem_mask = pass_all
-            selection.add("metphihemextveto", metphihem_mask)
-            selection.add('no_el_in_hem', electrons[electrons_in_hem(electrons)].counts==0)
-        else:
-            selection.add("metphihemextveto", pass_all)
-            selection.add('no_el_in_hem', pass_all)
+        selection.add("metphihemextveto", metphihem_mask)
+        selection.add('no_el_in_hem', no_el_in_hem_mask)
 
         # Sigma eta & phi cut (only for v8 samples because we have the info there)
         if cfg.RUN.ULEGACYV8:
@@ -381,12 +379,15 @@ def get_more_forward_jeteta(diak4):
         selection.add('two_jets', diak4.counts>0)
         selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
         selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
-        selection.add('hemisphere', hemisphere)
         selection.add('leadak4_id',leadak4_id.any())
         selection.add('trailak4_id',trailak4_id.any())
+        selection.add('hemisphere', hemisphere)
         selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
         selection.add('dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
         selection.add('detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)
+        
+        # Reject events where the leading jet has momentum > 6.5 TeV
+        selection.add('leadak4_clean', leadak4_clean.any())
 
         # Tighter detajj cut for ML studies
         selection.add('detajj_gt_3p0', df['detajj'] > 3.0)
@@ -397,9 +398,6 @@ def get_more_forward_jeteta(diak4):
         vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi'])
         dphitkpf = dphi(met_phi, df['TkMET_phi'])
 
-        # Reject events where the leading jet has momentum > 6.5 TeV
-        selection.add('leadak4_clean', leadak4_clean.any())
-
         # Divide into three categories for trigger study
         if cfg.RUN.TRIGGER_STUDY:
             two_central_jets = (np.abs(diak4.i0.eta) <= 2.5) & (np.abs(diak4.i1.eta) <= 2.5)
@@ -419,32 +417,30 @@ def get_more_forward_jeteta(diak4):
 
         # Dimuon CR
         selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
+        selection.add('two_muons', muons.counts==2)
+        selection.add('dimuon_charge', (dimuon_charge==0).any())
         selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                     & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
-        selection.add('dimuon_charge', (dimuon_charge==0).any())
-        selection.add('two_muons', muons.counts==2)
 
         # Single muon CR
         selection.add('one_muon', muons.counts==1)
         selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)
 
         # Diele CR
-
-        selection.add('one_electron', electrons.counts==1)
-        selection.add('two_electrons', electrons.counts==2)
         selection.add('at_least_one_tight_el', df['is_tight_electron'].any())
-
+        selection.add('two_electrons', electrons.counts==2)
+        selection.add('dielectron_charge', (dielectron_charge==0).any())
         selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                         & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
-        selection.add('dielectron_charge', (dielectron_charge==0).any())
 
         # Single Ele CR
+        selection.add('one_electron', electrons.counts==1)
         selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
         selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)
 
         # Photon CR
-        selection.add('one_photon', photons.counts==1)
         selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
+        selection.add('one_photon', photons.counts==1)
         selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
         selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)
 

From 92425e4c5e444920d0cc95bb770820a2e45dd12e Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 17:58:04 +0100
Subject: [PATCH 04/43] removed unused selections

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index 805bc242f..ee48054a3 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -201,11 +201,6 @@ def process(self, df):
         # Set up ParticleNet
         pfcands = load_pf_cands(df,[muons,electrons])
         session = load_particlenet_model(bucoffea_path("particlenet_models/model_ops12.onnx"))
-        
-        # Remove jets in accordance with the noise recipe
-        if not cfg.RUN.ULEGACYV8 and df['year'] == 2017:
-            ak4   = ak4[(ak4.ptraw>50) | (ak4.abseta<2.65) | (ak4.abseta>3.139)]
-            bjets = bjets[(bjets.ptraw>50) | (bjets.abseta<2.65) | (bjets.abseta>3.139)]
 
         # Filtering ak4 jets according to pileup ID
         ak4 = ak4[ak4.puid]
@@ -331,7 +326,7 @@ def get_more_forward_jeteta(diak4):
         selection.add('veto_muo', muons.counts==0)
         selection.add('veto_photon', photons.counts==0)
         selection.add('veto_tau', taus.counts==0)
-        selection.add('at_least_one_tau', taus.counts>0)
+        
         # B jets are treated using veto weights
         # So accept them in MC, but reject in data
         if df['is_data']:
@@ -365,16 +360,9 @@ def get_more_forward_jeteta(diak4):
         selection.add("metphihemextveto", metphihem_mask)
         selection.add('no_el_in_hem', no_el_in_hem_mask)
 
-        # Sigma eta & phi cut (only for v8 samples because we have the info there)
-        if cfg.RUN.ULEGACYV8:
-            selection.add('sigma_eta_minus_phi', setaphi_cut_alljets)
-            selection.add('central_stripsize_cut', stripsize_cut_alljets)
-            selection.add('fail_hf_cuts', fail_hf_cuts)
-
-        else:
-            selection.add('sigma_eta_minus_phi', pass_all)
-            selection.add('central_stripsize_cut', pass_all)
-            selection.add('fail_hf_cuts', pass_all)
+        selection.add('sigma_eta_minus_phi', setaphi_cut_alljets)
+        selection.add('central_stripsize_cut', stripsize_cut_alljets)
+        selection.add('fail_hf_cuts', fail_hf_cuts)
 
         selection.add('two_jets', diak4.counts>0)
         selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
@@ -385,7 +373,7 @@ def get_more_forward_jeteta(diak4):
         selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
         selection.add('dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
         selection.add('detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)
-        
+
         # Reject events where the leading jet has momentum > 6.5 TeV
         selection.add('leadak4_clean', leadak4_clean.any())
 

From 6dc169e3911ae48bc9756711c4de078dffc589cf Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 18:17:48 +0100
Subject: [PATCH 05/43] remove ULEGACYV8 variable

---
 bucoffea/config/vbfhinv.yaml         |  1 -
 bucoffea/helpers/weights.py          | 33 ++++++++++------------------
 bucoffea/vbfhinv/vbfhinvProcessor.py | 16 ++++++--------
 3 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/bucoffea/config/vbfhinv.yaml b/bucoffea/config/vbfhinv.yaml
index 1bdbd538c..850355a43 100644
--- a/bucoffea/config/vbfhinv.yaml
+++ b/bucoffea/config/vbfhinv.yaml
@@ -222,7 +222,6 @@ default:
 
   run:
     sync: False
-    ulegacyv8: True
     one_fifth_unblind: False
     tight_mjj_cut: False
     qcd_estimation: True
diff --git a/bucoffea/helpers/weights.py b/bucoffea/helpers/weights.py
index d68c3751e..3c112f34b 100644
--- a/bucoffea/helpers/weights.py
+++ b/bucoffea/helpers/weights.py
@@ -67,31 +67,22 @@ def varied_weight(sfname, *args):
 
 
         ### Electrons (For UL: Both 2017 and 2018 have their SFs split by electron pt)
-        if extract_year(df['dataset']) == 2017 or cfg.RUN.ULEGACYV8:
-            high_et = electrons.pt>20
+        high_et = electrons.pt>20
 
-            # Low pt SFs
-            low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et])
-            ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args)
-            ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args)
+        # Low pt SFs
+        low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et])
+        ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args)
+        ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args)
 
-            # High pt SFs
-            high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et])
+        # High pt SFs
+        high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et])
 
-            ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args)
-            ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args)
-
-            # Combine
-            veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod()
-        else:
-            # No split for 2018
-            args = (electrons.etasc, electrons.pt)
-            ele_reco_sf = varied_weight("ele_reco", *args)
-            ele_id_sf = varied_weight("ele_id_loose", *args)
-
-            # Combine
-            veto_weight_ele = (1 - ele_id_sf*ele_reco_sf).prod()
+        ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args)
+        ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args)
 
+        # Combine
+        veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod()
+        
         # Gen-checking for electrons
         if cfg.ELECTRON.GENCHECK:
             veto_weight_ele = gen_check_for_leptons(electrons, veto_weight_ele)
diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index ee48054a3..af5cdf432 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -635,10 +635,9 @@ def ewk_correction(a, b):
                     output['tree_float16'][region]["leadak4_chf"]       +=  processor.column_accumulator(np.float16(diak4.i0.chf[mask]))
                     output['tree_float16'][region]["leadak4_cef"]       +=  processor.column_accumulator(np.float16(diak4.i0.cef[mask]))
 
-                    if cfg.RUN.ULEGACYV8:
-                        output['tree_float16'][region]["leadak4_setaeta"]   +=  processor.column_accumulator(np.float16(diak4.i0.setaeta[mask]))
-                        output['tree_float16'][region]["leadak4_sphiphi"]   +=  processor.column_accumulator(np.float16(diak4.i0.sphiphi[mask]))
-                        output['tree_float16'][region]["leadak4_cssize"]    +=  processor.column_accumulator(np.float16(diak4.i0.hfcentralstripsize[mask]))
+                    output['tree_float16'][region]["leadak4_setaeta"]   +=  processor.column_accumulator(np.float16(diak4.i0.setaeta[mask]))
+                    output['tree_float16'][region]["leadak4_sphiphi"]   +=  processor.column_accumulator(np.float16(diak4.i0.sphiphi[mask]))
+                    output['tree_float16'][region]["leadak4_cssize"]    +=  processor.column_accumulator(np.float16(diak4.i0.hfcentralstripsize[mask]))
                 
                     output['tree_float16'][region]["trailak4_pt"]        +=  processor.column_accumulator(np.float16(diak4.i1.pt[mask]))
                     output['tree_float16'][region]["trailak4_eta"]       +=  processor.column_accumulator(np.float16(diak4.i1.eta[mask]))
@@ -648,10 +647,9 @@ def ewk_correction(a, b):
                     output['tree_float16'][region]["trailak4_chf"]       +=  processor.column_accumulator(np.float16(diak4.i1.chf[mask]))
                     output['tree_float16'][region]["trailak4_cef"]       +=  processor.column_accumulator(np.float16(diak4.i1.cef[mask]))
 
-                    if cfg.RUN.ULEGACYV8:
-                        output['tree_float16'][region]["trailak4_setaeta"]   +=  processor.column_accumulator(np.float16(diak4.i1.setaeta[mask]))
-                        output['tree_float16'][region]["trailak4_sphiphi"]   +=  processor.column_accumulator(np.float16(diak4.i1.sphiphi[mask]))
-                        output['tree_float16'][region]["trailak4_cssize"]    +=  processor.column_accumulator(np.float16(diak4.i1.hfcentralstripsize[mask]))
+                    output['tree_float16'][region]["trailak4_setaeta"]   +=  processor.column_accumulator(np.float16(diak4.i1.setaeta[mask]))
+                    output['tree_float16'][region]["trailak4_sphiphi"]   +=  processor.column_accumulator(np.float16(diak4.i1.sphiphi[mask]))
+                    output['tree_float16'][region]["trailak4_cssize"]    +=  processor.column_accumulator(np.float16(diak4.i1.hfcentralstripsize[mask]))
 
                     output['tree_float16'][region]["mjj"]               +=  processor.column_accumulator(np.float16(df["mjj"][mask]))
                     output['tree_float16'][region]["detajj"]            +=  processor.column_accumulator(np.float16(df["detajj"][mask]))
@@ -815,7 +813,7 @@ def ezfill(name, **kwargs):
             ezfill('ak4_central_eta',    jeteta=get_more_central_jeteta(diak4)[mask].flatten(),    weight=w_diak4)
             ezfill('ak4_forward_eta',    jeteta=get_more_forward_jeteta(diak4)[mask].flatten(),    weight=w_diak4)
 
-            if cfg.RUN.ULEGACYV8 and cfg.RUN.SAVE_HF_VARIABLES:
+            if cfg.RUN.SAVE_HF_VARIABLES:
                 def is_hf_jet(_ak4, ptmin=80, etamin=2.9, etamax=5.0):
                     return (_ak4.pt > ptmin) & (_ak4.abseta > etamin) & (_ak4.abseta < etamax)
 

From 3270f3e232c188ffed1952ebbeb04b35c04daa37 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 21:46:34 +0100
Subject: [PATCH 06/43] remove ULEGACYV8 variable: big fix

---
 bucoffea/monojet/definitions.py | 39 ++++++++++-----------------------
 1 file changed, 12 insertions(+), 27 deletions(-)

diff --git a/bucoffea/monojet/definitions.py b/bucoffea/monojet/definitions.py
index 520e3f52a..11fddf6f1 100644
--- a/bucoffea/monojet/definitions.py
+++ b/bucoffea/monojet/definitions.py
@@ -438,16 +438,14 @@ def setup_candidates(df, cfg):
         hadflav= 0*df['Jet_pt'] if df['is_data'] else df['Jet_hadronFlavour'],
     )
 
-    # Only fur UL v8 samples, the new HF shape variables
-    if cfg.RUN.ULEGACYV8:
-        kwargs = {
-            'setaeta': df['Jet_hfsigmaEtaEta'],
-            'sphiphi': df['Jet_hfsigmaPhiPhi'],
-            'hfcentralstripsize': df['Jet_hfcentralEtaStripSize'],
-            'hfadjacentstripsize': df['Jet_hfadjacentEtaStripsSize'],
-            'btagdf': df['Jet_btagDeepFlavQG'],
-        }
-        ak4.add_attributes(**kwargs)
+    kwargs = {
+        'setaeta': df['Jet_hfsigmaEtaEta'],
+        'sphiphi': df['Jet_hfsigmaPhiPhi'],
+        'hfcentralstripsize': df['Jet_hfcentralEtaStripSize'],
+        'hfadjacentstripsize': df['Jet_hfadjacentEtaStripsSize'],
+        'btagdf': df['Jet_btagDeepFlavQG'],
+    }
+    ak4.add_attributes(**kwargs)
 
     if not df['is_data']:
         ak4.add_attributes(jercorr=df['Jet_corr_JER'])
@@ -487,15 +485,7 @@ def setup_candidates(df, cfg):
     if cfg.OVERLAP.AK4.PHOTON.CLEAN:
         ak4 = ak4[object_overlap(ak4, photons, dr=cfg.OVERLAP.AK4.PHOTON.DR)]
 
-    # No EE v2 fix in UL
-    if cfg.RUN.ULEGACYV8:
-        met_branch = 'MET'
-    else:
-        if extract_year(df['dataset']) == 2017:
-            met_branch = 'METFixEE2017'
-        else:
-            met_branch = 'MET'
-
+    met_branch = 'MET'
     met_pt = df[f'{met_branch}_pt{jes_suffix_met}']
     met_phi = df[f'{met_branch}_phi{jes_suffix_met}']
     
@@ -952,14 +942,9 @@ def candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg):
     # Electron ID and reco
     # Function of eta, pT (Other way round relative to muons!)
 
-    # For 2017 and 2018 (both years in UL), the reco SF is split below/above 20 GeV
-    if cfg.RUN.ULEGACYV8 or extract_year(df['dataset']) == 2017:
-        high_et = electrons.pt>20
-        ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et], electrons.pt[high_et])
-        ele_reco_sf_low_pt = evaluator['ele_reco_pt_lt_20'](electrons.etasc[~high_et], electrons.pt[~high_et])
-
-    else:
-        ele_reco_sf = evaluator['ele_reco'](electrons.etasc, electrons.pt)
+    high_et = electrons.pt>20
+    ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et], electrons.pt[high_et])
+    ele_reco_sf_low_pt = evaluator['ele_reco_pt_lt_20'](electrons.etasc[~high_et], electrons.pt[~high_et])
     
     if cfg.ELECTRON.GENCHECK:
         ele_reco_sf = gen_match_check_leptons(electrons[high_et], ele_reco_sf).prod()

From ef4be254113916676269d7a1173c03a8ca4d1c33 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 22:07:39 +0100
Subject: [PATCH 07/43] sort region definitions + make list creation uniform

---
 bucoffea/vbfhinv/definitions.py | 201 +++++++++-----------------------
 1 file changed, 57 insertions(+), 144 deletions(-)

diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index dcf89f5a0..84b81c628 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -273,24 +273,31 @@ def vbfhinv_accumulator(cfg):
     return  processor.dict_accumulator(items)
 
 def vbfhinv_regions(cfg):
+    # 'inclusive'    # 'veto_b',
+    def clean_lists(orig_list, to_remove):
+        return list(filter(lambda x: x not in to_remove, orig_list))
+    
+    def add_lists(orig_list, to_add):
+        return list(orig_list+ to_add)
+    
     common_cuts = [
+        'filt_met',
         'veto_ele',
         'veto_muo',
-        'filt_met',
+        'veto_photon',
+        'veto_tau',
+        'veto_b',
         'mindphijr',
         'recoil',
         'two_jets',
         'leadak4_pt_eta',
-        'leadak4_id',
         'trailak4_pt_eta',
+        'leadak4_id',
         'trailak4_id',
         'hemisphere',
         'mjj',
         'dphijj',
         'detajj',
-        'veto_photon',
-        'veto_tau',
-        'veto_b',
         'leadak4_clean'
     ]
 
@@ -326,7 +333,7 @@ def vbfhinv_regions(cfg):
         regions['sr_vbf'].remove('eemitigation')
 
     if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
-        regions['sr_vbf_nodijetcut'] = [cut for cut in regions['sr_vbf'] if cut not in ['mjj','detajj','dphijj']]
+        regions['sr_vbf_nodijetcut'] = clean_lists(regions['sr_vbf'], ['mjj','detajj','dphijj'])
 
     # SR without PU weights
     # regions['sr_vbf_no_pu'] = copy.deepcopy(regions['sr_vbf'])
@@ -334,180 +341,86 @@ def vbfhinv_regions(cfg):
 
     # SR without HEM veto
     if cfg.RUN.HEMCHECK:
-        regions['sr_vbf_no_hem_veto'] = copy.deepcopy(regions['sr_vbf'])
-        regions['sr_vbf_no_hem_veto'].remove('metphihemextveto')
+        regions['sr_vbf_no_hem_veto'] = clean_lists(regions['sr_vbf'], ['metphihemextveto'])
 
     # QCD CR with the HF shape cuts inverted
     if cfg.RUN.QCD_ESTIMATION:
-        regions['cr_vbf_qcd'] = copy.deepcopy(regions['sr_vbf'])
+        to_remove = ['central_stripsize_cut', 'sigma_eta_minus_phi']
+        regions['cr_vbf_qcd'] = clean_lists(regions['sr_vbf'], to_remove)
         if 'one_fifth_mask' in regions['cr_vbf_qcd']:
             regions['cr_vbf_qcd'].remove('one_fifth_mask')
-        try:
-            regions['cr_vbf_qcd'].remove('central_stripsize_cut')
-            regions['cr_vbf_qcd'].remove('sigma_eta_minus_phi')
-        except:
-            pass
         regions['cr_vbf_qcd'].append('fail_hf_cuts')
 
     # QCD CR to check with deltaphi(jet,MET) cut inverted
     # Will be used to compare the yields with the QCD template obtained from R&S
     if cfg.RUN.REBSMEAR_CHECK:
-        regions['cr_vbf_qcd_rs'] = copy.deepcopy(regions['sr_vbf'])
-        regions['cr_vbf_qcd_rs'].remove('mindphijr')
-        regions['cr_vbf_qcd_rs'].append('mindphijr_inv')
-
-    # For sync mode
-    if cfg and cfg.RUN.SYNC:
-        regions['cr_sync'] = [
-            'trig_met',
-            'veto_photon',
-            'mindphijr',
-            'recoil',
-            'two_jets',
-            'leadak4_pt_eta',
-            'leadak4_id',
-            'trailak4_pt_eta',
-            'trailak4_id',
-            'hemisphere',
-            'mjj',
-            'dphijj',
-            'detajj'
-        ]
-
+        regions['cr_vbf_qcd_rs'] = clean_lists(regions['sr_vbf'], ['mindphijr'])
+        regions['cr_vbf_qcd_rs'] = add_lists(regions['cr_vbf_qcd_rs'], ['mindphijr_inv'])
+    
     # Dimuon CR
-    cr_2m_cuts = ['trig_met','two_muons', 'at_least_one_tight_mu', 'dimuon_mass', 'veto_ele', 'dimuon_charge'] + common_cuts[1:] + ['dpfcalo_cr']
-
-    cr_2m_cuts.remove('veto_muo')
-
-    regions['cr_2m_vbf'] = cr_2m_cuts
-
+    to_add = ['trig_met', 'at_least_one_tight_mu', 'two_muons', 'dimuon_charge', 'dimuon_mass', 'dpfcalo_cr']
+    to_remove = ['veto_muo']
+    regions['cr_2m_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    
     # Single muon CR
-    cr_1m_cuts = ['trig_met','one_muon', 'at_least_one_tight_mu',  'veto_ele'] + common_cuts[1:] + ['dpfcalo_cr']
-    cr_1m_cuts.remove('veto_muo')
-    regions['cr_1m_vbf'] = cr_1m_cuts
+    to_add = ['trig_met', 'at_least_one_tight_mu', 'one_muon', 'dpfcalo_cr']
+    to_remove = ['veto_muo']
+    regions['cr_1m_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
 
     # Dielectron CR
-    cr_2e_cuts = ['trig_ele','two_electrons', 'at_least_one_tight_el', 'dielectron_mass', 'veto_muo', 'dielectron_charge'] + common_cuts[2:] + ['dpfcalo_cr']
-    # cr_2e_cuts.remove('veto_ele')
-    regions['cr_2e_vbf'] = cr_2e_cuts
+    to_add = ['trig_ele', 'at_least_one_tight_el', 'two_electrons', 'dielectron_charge', 'dielectron_mass', 'dpfcalo_cr']
+    to_remove = ['veto_ele']
+    regions['cr_2e_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    
+    # Single electron CR
+    to_add = ['trig_ele', 'at_least_one_tight_el', 'one_electron', 'met_el', 'no_el_in_hem', 'dpfcalo_cr']
+    to_remove = ['veto_ele']
+    regions['cr_1e_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+
+    # Photon CR
+    to_add = ['trig_photon', 'at_least_one_tight_photon', 'one_photon', 'photon_pt', 'dpfcalo_cr']
+    to_remove = ['veto_photon']
+    regions['cr_g_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
 
     # Z CRs with CaloMETNoLep cut
     if cfg.RUN.CALOMET_CHECK:
         for r in ['cr_2e_vbf', 'cr_2m_vbf']:
-            regions[f'{r}_calocut'] = copy.deepcopy(regions[r])
-            regions[f'{r}_calocut'].append('calo_metptnolep')
-
-    # Single electron CR
-    cr_1e_cuts = ['trig_ele','one_electron', 'at_least_one_tight_el', 'veto_muo','met_el'] + common_cuts[1:] + ['dpfcalo_cr', 'no_el_in_hem']
-    # cr_1e_cuts.remove('veto_ele')
-    regions['cr_1e_vbf'] =  cr_1e_cuts
-
-    # Photon CR
-    cr_g_cuts = ['trig_photon', 'one_photon', 'at_least_one_tight_photon','photon_pt'] + common_cuts + ['dpfcalo_cr']
-    cr_g_cuts.remove('veto_photon')
-
-    regions['cr_g_vbf'] = cr_g_cuts
-
-    if cfg and cfg.RUN.SYNC:
-        regions['sync_sr_vbf_round1'] = [
-                                        'filt_met',
-                                        'trig_met',
-                                        'veto_photon',
-                                        'mindphijr',
-                                        'recoil',
-                                        'two_jets',
-                                        'leadak4_pt_eta',
-                                        'leadak4_id',
-                                        'trailak4_pt_eta',
-                                        'trailak4_id',
-                                        'hemisphere',
-                                        'mjj',
-                                        'dphijj',
-                                        'detajj',
-                                        ]
+            regions[f'{r}_calocut'] = add_lists(regions[r], 'calo_metptnolep')
 
     # VBF signal region where the hard-lepton vetoes are replace
     # with lepton veto weights
-    tmp = {}
-    for region in regions.keys():
-        if not region.startswith("sr_"):
-            continue
-        new_region = f"{region}_no_veto_all"
-        tmp[new_region] = copy.deepcopy(regions[region])
-        tmp[new_region].remove("veto_muo")
-        tmp[new_region].remove("veto_tau")
-        tmp[new_region].remove("veto_ele")
-        tmp[new_region].remove("mindphijr")
-        tmp[new_region].remove("recoil")
-        tmp[new_region].append("met_sr")
-        tmp[new_region].append("mindphijm")
-
-    regions.update(tmp)
+    to_add = ['met_sr', 'mindphijm']
+    to_remove = ['veto_muo', 'veto_tau', 'veto_ele', 'mindphijr', 'recoil']
+    regions.update(dict([(f"{region}_no_veto_all", add_lists(clean_lists(regions[region], to_remove),to_add)) for region in regions.keys() if region.startswith("sr_")]))
 
     # Region with high detajj cut
     if "sr_vbf_detajj_gt_3p0" in cfg.RUN.EXTRA_REGIONS:
-        regions['sr_vbf_detajj_gt_3p0'] = copy.deepcopy(regions['sr_vbf_no_veto_all'])
-        regions['sr_vbf_detajj_gt_3p0'].append('detajj_gt_3p0')
+        regions['sr_vbf_detajj_gt_3p0'] = add_lists(regions['sr_vbf_no_veto_all'], ['detajj_gt_3p0'])
 
     # VBF signal region without the dphijj cut
     if "sr_vbf_no_dphijj_cut" in cfg.RUN.EXTRA_REGIONS:
-        regions['sr_vbf_no_dphijj_cut'] = copy.deepcopy(regions['sr_vbf_no_veto_all'])
-        regions['sr_vbf_no_dphijj_cut'].remove('dphijj')
+        regions['sr_vbf_no_dphijj_cut'] = clean_lists(regions['sr_vbf_no_veto_all'], ['dphijj'])
 
-    if cfg and cfg.RUN.TRIGGER_STUDY:
+    if cfg.RUN.TRIGGER_STUDY:
         # Trigger studies
         # num = numerator, den = denominator
         # Single Mu region: Remove mjj cut, add SingleMu trigger, toggle MET trigger
-        tr_1m_num_cuts = copy.deepcopy(cr_1m_cuts)
-        tr_1m_num_cuts.remove('recoil')
-        tr_1m_num_cuts.append('trig_mu')
-        tr_1m_num_cuts.append('mu_pt_trig_safe')
-
-        regions['tr_1m_num_two_central_jets'] = tr_1m_num_cuts + ['two_central_jets']
-        regions['tr_1m_num_one_jet_forward_one_jet_central'] = tr_1m_num_cuts + ['one_jet_forward_one_jet_central']
-        regions['tr_1m_num_two_hf_jets'] = tr_1m_num_cuts + ['two_hf_jets']
-
-        tr_1m_den_cuts = copy.deepcopy(tr_1m_num_cuts)
-        tr_1m_den_cuts.remove('trig_met')
-
-        regions['tr_1m_den_two_central_jets'] = tr_1m_den_cuts + ['two_central_jets']
-        regions['tr_1m_den_one_jet_forward_one_jet_central'] = tr_1m_den_cuts + ['one_jet_forward_one_jet_central']
-        regions['tr_1m_den_two_hf_jets'] = tr_1m_den_cuts + ['two_hf_jets']
-
-        # Double Mu region: Remove mjj cut, toggle MET trigger
-        tr_2m_num_cuts = copy.deepcopy(cr_2m_cuts)
-        tr_2m_num_cuts.remove('mjj')
-        tr_2m_num_cuts.append('trig_mu')
-        tr_2m_num_cuts.append('mu_pt_trig_safe')
-
-        regions['tr_2m_num_two_central_jets'] = tr_2m_num_cuts + ['two_central_jets']
-        regions['tr_2m_num_one_jet_forward_one_jet_central'] = tr_2m_num_cuts + ['one_jet_forward_one_jet_central']
-        regions['tr_2m_num_two_hf_jets'] = tr_2m_num_cuts + ['two_hf_jets']
-
-        tr_2m_den_cuts = copy.deepcopy(tr_2m_num_cuts)
-        tr_2m_den_cuts.remove('trig_met')
-
-        regions['tr_2m_den_two_central_jets'] = tr_2m_den_cuts + ['two_central_jets']
-        regions['tr_2m_den_one_jet_forward_one_jet_central'] = tr_2m_den_cuts + ['one_jet_forward_one_jet_central']
-        regions['tr_2m_den_two_hf_jets'] = tr_2m_den_cuts + ['two_hf_jets']
-
-        # Photon region
-        tr_g_num_cuts = copy.deepcopy(cr_g_cuts)
-        tr_g_num_cuts.remove('recoil')
-        tr_g_num_cuts.remove('photon_pt')
+        for cut in ['two_central_jets', 'one_jet_forward_one_jet_central', 'two_hf_jets']:
+            regions[f"tr_1m_num_{cut}"] = add_lists(clean_lists(regions['cr_1m_vbf'], ['recoil']), ['trig_mu', 'mu_pt_trig_safe', cut])
+            regions[f"tr_1m_den_{cut}"] = clean_lists(regions[f"tr_1m_num_{cut}"], ['trig_met'])
 
-        tr_g_den_cuts = copy.deepcopy(tr_g_num_cuts)
-        tr_g_den_cuts.remove('trig_photon')
+            regions[f"tr_2m_num_{cut}"] = add_lists(clean_lists(regions['cr_2m_vbf'], ['mjj']), ['trig_mu', 'mu_pt_trig_safe', cut])
+            regions[f"tr_2m_den_{cut}"] = clean_lists(regions[f"tr_2m_num_{cut}"], ['trig_met'])
 
-        regions[f'tr_g_notrig_num'] = copy.deepcopy(tr_g_num_cuts)
-        regions[f'tr_g_notrig_den'] = copy.deepcopy(tr_g_den_cuts)
+        regions[f"tr_g_notrig_num"] = clean_lists(regions['cr_g_vbf'], ['recoil', 'photon_pt'])
+        regions[f"tr_g_notrig_den"] = clean_lists(regions[f"tr_g_notrig_num"], ['trig_photon'])
 
         for trgname in cfg.TRIGGERS.HT.GAMMAEFF:
-            regions[f'tr_g_{trgname}_num'] = tr_g_num_cuts + [trgname]
-            regions[f'tr_g_{trgname}_den'] = tr_g_den_cuts + [trgname]
+            regions[f'tr_g_{trgname}_num'] = add_lists(regions[f"tr_g_notrig_num"], [trgname])
+            regions[f'tr_g_{trgname}_den'] = add_lists(regions[f"tr_g_notrig_den"], [trgname])
 
-            regions[f'tr_g_{trgname}_photon_pt_trig_cut_num'] = tr_g_num_cuts + [trgname, 'photon_pt_trig']
-            regions[f'tr_g_{trgname}_photon_pt_trig_cut_den'] = tr_g_den_cuts + [trgname, 'photon_pt_trig']
+            regions[f'tr_g_{trgname}_photon_pt_trig_cut_num'] = add_lists(regions[f"tr_g_notrig_num"], [trgname, 'photon_pt_trig'])
+            regions[f'tr_g_{trgname}_photon_pt_trig_cut_den'] = add_lists(regions[f"tr_g_notrig_den"], [trgname, 'photon_pt_trig'])
 
     return regions
 

From 218ee2e38c3306512c941919dce3502a7ee3e93a Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Fri, 17 Mar 2023 22:11:30 +0100
Subject: [PATCH 08/43] fix config file: remove unused variables + add new

---
 bucoffea/config/vbfhinv.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bucoffea/config/vbfhinv.yaml b/bucoffea/config/vbfhinv.yaml
index 850355a43..29b4febc4 100644
--- a/bucoffea/config/vbfhinv.yaml
+++ b/bucoffea/config/vbfhinv.yaml
@@ -135,7 +135,7 @@ default:
         clean: false
         dr: 0.4
   mitigation:
-    hem: false
+    hem: True
 
   sf:
     qcd_ew_nlo_w: # QCD x EW k factor for QCD Z
@@ -221,7 +221,6 @@ default:
       use_average: False
 
   run:
-    sync: False
     one_fifth_unblind: False
     tight_mjj_cut: False
     qcd_estimation: True
@@ -232,7 +231,7 @@ default:
     regionregex: .*
     apply_hf_cuts: True
     hf_pt_thresh: 80
-    region_without_dijet_cuts: False
+    region_without_dijet_cuts: True
     apply_weights:
       hfmask: False
       endcap: False
@@ -242,6 +241,7 @@ default:
       jet_images: False
       tree_regions:
         - sr_vbf_no_veto_all
+        - sr_vbf
     kinematics:
       save: False
       events:

From 4a7040472c64d1a593b44c30c63a51cb12f7327b Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Sat, 18 Mar 2023 14:22:49 +0100
Subject: [PATCH 09/43] Update lumi json files

---
 ...TeV_ReReco_07Aug2017_Collisions16_JSON.txt |   1 -
 ...TeV_EOY2017ReReco_Collisions17_JSON_v1.txt |   1 -
 ...2_13TeV_UL2017_Collisions17_GoldenJSON.txt | 615 +++++++++++++++++
 ...co2018ABC_PromptEraD_Collisions18_JSON.txt |   1 -
 ...175_13TeV_Legacy2018_Collisions18_JSON.txt | 648 ++++++++++++++++++
 5 files changed, 1263 insertions(+), 3 deletions(-)
 delete mode 100644 bucoffea/data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt
 delete mode 100644 bucoffea/data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt
 create mode 100644 bucoffea/data/json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt
 delete mode 100644 bucoffea/data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt
 create mode 100644 bucoffea/data/json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt

diff --git a/bucoffea/data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt b/bucoffea/data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt
deleted file mode 100644
index 46391645a..000000000
--- a/bucoffea/data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"273158": [[1, 1279]], "273302": [[1, 459]], "273402": [[100, 292]], "273403": [[1, 53]], "273404": [[1, 18]], "273405": [[2, 25]], "273406": [[1, 112]], "273408": [[1, 6]], "273409": [[1, 309]], "273410": [[1, 90]], "273411": [[1, 29]], "273425": [[62, 352], [354, 733]], "273446": [[1, 33]], "273447": [[1, 113], [115, 412]], "273448": [[1, 391]], "273449": [[1, 214]], "273450": [[1, 214], [219, 647]], "273492": [[71, 71], [73, 282], [284, 325], [327, 338]], "273493": [[1, 233]], "273494": [[1, 192]], "273502": [[73, 256], [258, 318], [320, 813], [815, 1064]], "273503": [[1, 598]], "273554": [[77, 437]], "273555": [[1, 173]], "273725": [[83, 252], [254, 2545]], "273728": [[1, 100]], "273730": [[1, 1814], [1820, 2126]], "274094": [[108, 332]], "274146": [[1, 67]], "274157": [[105, 534]], "274159": [[1, 43]], "274160": [[1, 207]], "274161": [[1, 516]], "274172": [[31, 95]], "274198": [[81, 191]], "274199": [[1, 623]], "274200": [[1, 678]], "274240": [[1, 40], [42, 82]], "274241": [[1, 1152], [1161, 1176]], "274244": [[1, 607]], "274250": [[1, 701]], "274251": [[1, 546]], "274283": [[2, 19]], "274284": [[1, 210]], "274286": [[1, 154]], "274314": [[97, 97], [99, 158]], "274315": [[1, 424]], "274316": [[1, 959]], "274317": [[1, 3]], "274319": [[1, 225]], "274335": [[60, 1003]], "274336": [[1, 14]], "274337": [[3, 17]], "274338": [[1, 698]], "274339": [[1, 29], [31, 31], [33, 33], [35, 93]], "274344": [[1, 632]], "274345": [[1, 170]], "274382": [[94, 144]], "274387": [[88, 439]], "274388": [[1, 1820]], "274420": [[94, 268]], "274421": [[1, 342]], "274422": [[1, 2207]], "274440": [[92, 493]], "274441": [[1, 431]], "274442": [[1, 752]], "274954": [[37, 37], [39, 57]], "274955": [[1, 91]], "274968": [[1, 1192]], "274969": [[1, 1003]], "274970": [[1, 47]], "274971": [[1, 905]], "274998": [[64, 782]], "274999": [[1, 1241]], "275000": [[1, 136]], "275001": [[1, 1781], [1786, 2061]], "275059": [[78, 81], [105, 137]], "275066": [[1, 96]], "275067": [[1, 392]], "275068": [[1, 915]], "275073": [[1, 517]], "275074": [[1, 442], [444, 647]], "275124": [[106, 106], [108, 431]], "275125": [[1, 989]], "275282": [[91, 180]], "275283": [[1, 132]], "275284": [[1, 74]], "275290": [[96, 143]], "275291": [[1, 347]], "275292": [[1, 121]], "275293": [[1, 142], [144, 201]], "275309": [[55, 617]], "275310": [[1, 1929]], "275311": [[1, 1253]], "275319": [[141, 282]], "275337": [[1, 427]], "275338": [[1, 520]], "275344": [[76, 356]], "275345": [[1, 353]], "275370": [[81, 365]], "275371": [[1, 22], [28, 569]], "275375": [[127, 1449]], "275376": [[1, 2667], [2669, 3096]], "275657": [[1, 105]], "275658": [[1, 337]], "275659": [[1, 17]], "275761": [[1, 9]], "275767": [[1, 4]], "275772": [[1, 56]], "275773": [[1, 7]], "275774": [[1, 311], [315, 315]], "275776": [[1, 140]], "275777": [[1, 300]], "275778": [[1, 305]], "275782": [[1, 131], [133, 762]], "275832": [[1, 367]], "275833": [[1, 53], [56, 115], [117, 251]], "275834": [[1, 297]], "275835": [[1, 13]], "275836": [[1, 429], [431, 1163], [1166, 1170], [1184, 1293]], "275837": [[1, 186], [198, 726]], "275847": [[1, 2263]], "275886": [[73, 109]], "275890": [[1, 1393]], "275911": [[62, 298], [300, 354], [356, 440]], "275912": [[1, 289]], "275913": [[1, 475]], "275918": [[1, 318], [348, 361]], "275920": [[5, 463]], "275921": [[1, 2], [4, 5], [17, 20]], "275923": [[3, 53], [63, 64], [66, 126]], "275931": [[1, 14], [19, 89]], "275963": [[82, 139], [141, 172]], "276092": [[74, 149]], "276097": [[1, 507]], "276242": [[1, 7], [18, 61], [72, 1664]], "276243": [[1, 15], [18, 480], [482, 611]], "276244": [[3, 1202]], "276282": [[75, 534], [537, 1142]], "276283": [[3, 1087]], "276315": [[40, 175], [178, 217]], "276317": [[3, 138]], "276318": [[3, 103], [106, 570]], "276355": [[1, 33]], "276361": [[1, 161], [169, 208], [210, 800], [802, 833]], "276363": [[1, 140], [142, 238], [242, 1482]], "276384": [[2, 1117]], "276437": [[63, 224], [227, 1074], [1076, 2190]], "276454": [[1, 527]], "276458": [[1, 341]], "276495": [[87, 268]], "276501": [[4, 221], [223, 2547]], "276502": [[2, 741]], "276525": [[88, 469], [471, 1606], [1626, 2893]], "276527": [[1, 214]], "276528": [[4, 394]], "276542": [[74, 857]], "276543": [[1, 638], [643, 952]], "276544": [[2, 161]], "276545": [[2, 110], [117, 213]], "276581": [[79, 444]], "276582": [[1, 871]], "276583": [[1, 52]], "276584": [[1, 2]], "276585": [[1, 238], [241, 242], [245, 246]], "276586": [[2, 658], [680, 773]], "276587": [[1, 1006]], "276653": [[72, 550]], "276655": [[1, 593], [595, 1106]], "276659": [[1, 127], [129, 252]], "276775": [[96, 1260]], "276776": [[1, 1823]], "276794": [[1, 885]], "276807": [[66, 220]], "276808": [[1, 875]], "276810": [[1, 287]], "276811": [[1, 1270], [1272, 2563]], "276831": [[64, 755], [761, 2702]], "276834": [[1, 720]], "276870": [[78, 1354], [1356, 3108], [3111, 3258], [3260, 3484]], "276935": [[79, 184], [186, 838], [842, 906]], "276940": [[70, 213]], "276946": [[1, 27]], "276947": [[1, 89], [91, 126], [135, 141]], "276948": [[1, 474]], "276950": [[1, 2353]], "277069": [[81, 265], [267, 390]], "277070": [[1, 309], [311, 1059]], "277071": [[1, 82], [90, 178]], "277072": [[1, 253], [256, 466]], "277073": [[1, 90]], "277076": [[1, 3], [5, 7], [9, 35], [38, 1037]], "277087": [[204, 1191]], "277094": [[1, 161], [164, 584]], "277096": [[1, 1309], [1311, 2086]], "277112": [[1, 155]], "277126": [[42, 59]], "277127": [[1, 438], [440, 902]], "277148": [[83, 190], [193, 700]], "277166": [[77, 186], [188, 431]], "277168": [[1, 1708], [1711, 1822], [1824, 2223]], "277180": [[88, 228]], "277194": [[113, 139], [144, 497], [500, 1115], [1117, 1312], [1320, 1749], [1754, 2067], [2070, 2070]], "277305": [[62, 744]], "277420": [[84, 84], [86, 291], [293, 346]], "277981": [[82, 83], [85, 163]], "277991": [[1, 98]], "277992": [[1, 260], [262, 312]], "278017": [[77, 97], [99, 213], [215, 512], [514, 589]], "278018": [[1, 263], [265, 422], [424, 615], [617, 627], [642, 1011], [1020, 1181]], "278167": [[87, 394], [397, 1153], [1155, 1660], [1662, 1707], [1709, 2258]], "278175": [[1, 88]], "278193": [[77, 231]], "278239": [[76, 339], [341, 558], [560, 740]], "278240": [[1, 64], [70, 113], [115, 1121], [1123, 1296], [1299, 1309]], "278273": [[75, 110]], "278274": [[1, 18], [20, 85]], "278288": [[67, 81]], "278289": [[1, 42], [44, 52]], "278290": [[1, 11]], "278308": [[87, 216], [219, 587], [589, 680], [683, 1200], [1217, 1410], [1413, 1848], [1880, 1880]], "278310": [[1, 32], [34, 709]], "278315": [[73, 254], [256, 661], [663, 767]], "278345": [[84, 500], [503, 831]], "278346": [[1, 117]], "278349": [[1, 401], [403, 612], [632, 633]], "278366": [[1, 453]], "278406": [[85, 360], [362, 1682]], "278509": [[91, 1557]], "278769": [[75, 104]], "278770": [[1, 767]], "278801": [[48, 85]], "278802": [[1, 17]], "278803": [[1, 87], [91, 133], [135, 297], [299, 323]], "278804": [[1, 4]], "278805": [[3, 26], [30, 167], [170, 193], [196, 280], [283, 284], [288, 288]], "278808": [[1, 445], [447, 462], [464, 1793]], "278820": [[17, 1533]], "278822": [[1, 1627]], "278873": [[70, 129]], "278874": [[1, 273], [275, 478]], "278875": [[1, 210], [212, 834]], "278923": [[55, 467]], "278957": [[79, 227]], "278962": [[68, 408]], "278963": [[1, 23], [25, 175]], "278969": [[70, 511], [514, 1051], [1053, 1291], [1293, 1397], [1399, 1460]], "278975": [[1, 475], [477, 745], [747, 850]], "278976": [[1, 20]], "278986": [[71, 199]], "279024": [[82, 382]], "279029": [[1, 260]], "279071": [[71, 244]], "279080": [[68, 224]], "279115": [[118, 524]], "279116": [[38, 485]], "279479": [[86, 190]], "279588": [[100, 1259]], "279653": [[77, 77], [82, 261]], "279654": [[1, 108], [110, 1231], [1285, 1299]], "279656": [[1, 43]], "279658": [[1, 689], [691, 713]], "279667": [[68, 1033]], "279681": [[77, 104]], "279682": [[1, 29], [33, 34], [37, 38]], "279683": [[1, 26]], "279684": [[1, 22]], "279685": [[1, 93], [95, 209]], "279691": [[71, 113]], "279694": [[1, 2235]], "279715": [[71, 474], [476, 477], [480, 480], [511, 511], [523, 691]], "279716": [[1, 860], [875, 1528], [1530, 1653]], "279760": [[68, 578], [585, 728]], "279766": [[1, 1689]], "279767": [[1, 776]], "279794": [[77, 1100]], "279823": [[61, 395]], "279841": [[75, 398], [407, 2122]], "279844": [[72, 295]], "279887": [[79, 221], [225, 397]], "279931": [[84, 628], [630, 743], [746, 801], [803, 1043], [1045, 3022]], "279966": [[79, 441]], "279975": [[70, 190], [192, 253], [256, 281], [283, 709], [734, 1121]], "279993": [[85, 156]], "279994": [[1, 47]], "280013": [[1, 25]], "280015": [[1, 39], [41, 56], [59, 554], [560, 580]], "280016": [[1, 149]], "280017": [[1, 608]], "280018": [[1, 1281]], "280020": [[1, 45]], "280024": [[1, 427]], "280187": [[4, 60]], "280188": [[1, 245]], "280191": [[1, 781], [783, 866], [869, 900]], "280194": [[1, 238]], "280242": [[1, 411], [414, 627]], "280249": [[1, 486], [488, 1433]], "280251": [[1, 165], [167, 372]], "280327": [[49, 85]], "280330": [[1, 857]], "280349": [[1, 247], [252, 623], [626, 626]], "280363": [[1, 359]], "280364": [[1, 370], [372, 617], [619, 619], [621, 1090], [1102, 1363]], "280383": [[64, 65]], "280384": [[2, 34]], "280385": [[1, 519], [523, 569], [574, 1187], [1189, 1533], [1536, 2022]], "281613": [[101, 128], [130, 130], [133, 133], [135, 139], [143, 256], [258, 903]], "281639": [[1, 132]], "281641": [[1, 319]], "281693": [[1, 2191]], "281707": [[99, 982], [1000, 1065]], "281726": [[1, 288]], "281727": [[1, 1605]], "281797": [[125, 2176]], "281975": [[1, 215]], "281976": [[1, 2166]], "282033": [[82, 117]], "282034": [[1, 33]], "282035": [[1, 40]], "282037": [[1, 457], [459, 1862]], "282092": [[92, 222], [624, 2276]], "282708": [[1, 8]], "282710": [[1, 2], [8, 8]], "282712": [[1, 1], [10, 68]], "282730": [[89, 164]], "282731": [[1, 172]], "282732": [[1, 69]], "282733": [[1, 177]], "282734": [[1, 327]], "282735": [[1, 642], [645, 1232], [1235, 1823]], "282800": [[1, 377]], "282807": [[1, 326]], "282814": [[1, 1843]], "282842": [[1, 80]], "282917": [[117, 157], [159, 191]], "282918": [[1, 51]], "282919": [[1, 243]], "282922": [[1, 131]], "282923": [[1, 17], [19, 30], [32, 36], [38, 39], [41, 86], [88, 224]], "283042": [[1, 6]], "283043": [[1, 105], [108, 519]], "283049": [[82, 93]], "283050": [[1, 212]], "283052": [[1, 111]], "283059": [[1, 125], [127, 451]], "283270": [[76, 573], [576, 1502], [1504, 1888], [1890, 1912]], "283283": [[4, 1668], [1670, 1748]], "283305": [[79, 85]], "283306": [[1, 289]], "283307": [[1, 153], [156, 456]], "283308": [[1, 547], [549, 571], [573, 895], [897, 948]], "283353": [[80, 822]], "283358": [[1, 243], [245, 981]], "283359": [[1, 428]], "283407": [[82, 114]], "283408": [[1, 27], [29, 2088], [2098, 2125], [2203, 2416], [2528, 2542]], "283416": [[49, 151], [154, 245]], "283453": [[83, 537]], "283469": [[74, 74]], "283478": [[76, 303], [324, 969]], "283548": [[145, 288]], "283680": [[1, 81]], "283681": [[1, 17]], "283682": [[1, 384]], "283685": [[1, 314]], "283820": [[67, 1548]], "283830": [[1, 722]], "283834": [[1, 67], [69, 82]], "283835": [[1, 14], [16, 112]], "283865": [[1, 1177]], "283876": [[65, 211], [215, 724]], "283877": [[1, 1496]], "283884": [[349, 504], [509, 756]], "283885": [[1, 1723]], "283933": [[88, 232]], "283934": [[1, 784], [793, 870], [875, 1245], [1267, 1291]], "283946": [[85, 1448], [1450, 1462]], "283964": [[1, 388]], "284006": [[73, 390]], "284014": [[1, 266]], "284025": [[110, 157]], "284029": [[1, 112]], "284035": [[1, 360]], "284036": [[1, 140], [143, 348]], "284037": [[1, 340]], "284038": [[1, 55]], "284039": [[1, 30]], "284040": [[1, 33]], "284041": [[1, 44]], "284042": [[1, 129]], "284043": [[1, 205], [210, 224]], "284044": [[1, 30]]}
\ No newline at end of file
diff --git a/bucoffea/data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt b/bucoffea/data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt
deleted file mode 100644
index bc17f4b86..000000000
--- a/bucoffea/data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"297050": [[12, 137], [193, 776]], "297056": [[12, 203]], "297057": [[1, 4], [14, 105], [112, 377], [385, 418], [424, 509], [516, 906]], "297099": [[24, 62]], "297100": [[1, 15], [21, 369], [375, 381]], "297101": [[1, 668], [673, 697], [700, 856], [862, 937], [943, 1101]], "297113": [[1, 204], [211, 252]], "297114": [[1, 99], [106, 161]], "297175": [[1, 85]], "297176": [[11, 120], [125, 214]], "297177": [[1, 162]], "297178": [[1, 54], [59, 334], [342, 749], [754, 967], [972, 1037], [1043, 1264], [1272, 1282], [1290, 1385]], "297215": [[1, 47]], "297218": [[1, 27]], "297219": [[1, 80], [85, 281], [288, 579], [585, 916], [921, 1429], [1436, 2004], [2010, 2638]], "297224": [[10, 19], [24, 138]], "297225": [[1, 32]], "297227": [[9, 192]], "297292": [[1, 125], [130, 131], [136, 667], [675, 753]], "297293": [[1, 121], [127, 150]], "297296": [[1, 236], [240, 401], [406, 418], [425, 497]], "297308": [[1, 44]], "297359": [[39, 70], [164, 180]], "297411": [[32, 737], [740, 800], [807, 950]], "297424": [[32, 149]], "297425": [[1, 107], [112, 157]], "297426": [[1, 28], [34, 84], [90, 111]], "297429": [[1, 72]], "297430": [[1, 199]], "297431": [[1, 49], [55, 64], [71, 188]], "297432": [[1, 112]], "297433": [[1, 159]], "297434": [[1, 161]], "297435": [[1, 94]], "297467": [[50, 138]], "297468": [[1, 74]], "297469": [[1, 4], [9, 70]], "297483": [[37, 68], [71, 201], [206, 214]], "297484": [[1, 47], [53, 208], [214, 214]], "297485": [[1, 16], [23, 253], [258, 299], [302, 314], [321, 420]], "297486": [[1, 74], [79, 598], [603, 625]], "297487": [[1, 433], [439, 491], [495, 603], [609, 613]], "297488": [[1, 73], [80, 424]], "297503": [[5, 275], [282, 559], [566, 606], [612, 635], [642, 772], [777, 779]], "297504": [[1, 41], [125, 136]], "297505": [[1, 394]], "297557": [[8, 28], [67, 113], [119, 167], [173, 174], [180, 394]], "297558": [[9, 266]], "297562": [[1, 69], [120, 369]], "297563": [[1, 254], [260, 264]], "297598": [[17, 17], [22, 33]], "297599": [[1, 169], [211, 225], [230, 312], [319, 385], [395, 407]], "297603": [[1, 420]], "297604": [[1, 126], [131, 272], [279, 375], [381, 407]], "297605": [[1, 6], [13, 20], [24, 89], [95, 223], [257, 407]], "297606": [[1, 94], [99, 231]], "297620": [[32, 318]], "297656": [[64, 116], [123, 135], [140, 230], [269, 307], [313, 330], [341, 388], [393, 433]], "297665": [[1, 153], [159, 209], [214, 279]], "297666": [[1, 11], [17, 81], [86, 121]], "297670": [[21, 34]], "297674": [[3, 102], [108, 188]], "297675": [[1, 123], [129, 239], [244, 328], [334, 467], [470, 471]], "297722": [[55, 160], [165, 353]], "297723": [[1, 13], [51, 222]], "298996": [[33, 216]], "298997": [[1, 37], [47, 47]], "299000": [[4, 77]], "299042": [[33, 55]], "299061": [[38, 355]], "299062": [[1, 163], [166, 303]], "299064": [[7, 85]], "299065": [[13, 248], [251, 342]], "299067": [[1, 459]], "299096": [[2, 97]], "299149": [[29, 470]], "299178": [[37, 56], [58, 111]], "299180": [[5, 98]], "299184": [[1, 561]], "299185": [[1, 120]], "299327": [[1, 72]], "299329": [[1, 172]], "299368": [[37, 175]], "299369": [[1, 303]], "299370": [[1, 7], [47, 705]], "299380": [[34, 227]], "299381": [[1, 45]], "299394": [[5, 33]], "299395": [[1, 187]], "299396": [[1, 81]], "299420": [[2, 50]], "299443": [[145, 164]], "299450": [[39, 88]], "299477": [[39, 42], [82, 87]], "299478": [[1, 175]], "299479": [[1, 123]], "299480": [[1, 6], [8, 715]], "299481": [[1, 196], [199, 236], [260, 479], [487, 940], [943, 1037], [1061, 1257]], "299593": [[95, 177], [179, 896]], "299594": [[1, 317]], "299595": [[1, 134], [138, 138]], "299597": [[3, 91], [93, 540]], "299649": [[151, 332]], "300087": [[36, 59], [61, 126], [128, 216], [218, 239]], "300105": [[1, 21]], "300106": [[1, 74]], "300107": [[1, 28], [30, 47]], "300117": [[35, 67]], "300122": [[46, 730], [735, 924], [927, 1295]], "300123": [[1, 384], [387, 612]], "300155": [[35, 1229]], "300156": [[1, 72]], "300157": [[9, 1107]], "300226": [[43, 448]], "300233": [[43, 162]], "300234": [[1, 59]], "300235": [[1, 187]], "300236": [[11, 187]], "300237": [[1, 713], [716, 717]], "300238": [[30, 58], [62, 329]], "300239": [[1, 145], [148, 167], [171, 213]], "300240": [[1, 7], [11, 46], [51, 362]], "300280": [[52, 56], [61, 69], [73, 150], [155, 165], [178, 198], [207, 222], [226, 251], [255, 268], [275, 345], [349, 370], [381, 548], [553, 607], [617, 639], [663, 691]], "300281": [[3, 8]], "300282": [[1, 9], [13, 59], [73, 92], [97, 114], [142, 151], [156, 186]], "300283": [[1, 34]], "300284": [[1, 22], [38, 47], [50, 82], [90, 98], [108, 130], [133, 152], [156, 250], [260, 414], [420, 561], [568, 585], [590, 680], [691, 751]], "300364": [[27, 46]], "300365": [[1, 20]], "300366": [[1, 21]], "300367": [[1, 20]], "300368": [[1, 20]], "300369": [[1, 20]], "300370": [[1, 20]], "300371": [[1, 20]], "300372": [[1, 8]], "300373": [[1, 21]], "300374": [[1, 21]], "300375": [[1, 93]], "300389": [[1, 1], [4, 5], [8, 8], [11, 20], [23, 39], [60, 149]], "300390": [[2, 21]], "300391": [[1, 21]], "300392": [[1, 21]], "300393": [[1, 20]], "300394": [[1, 21]], "300395": [[1, 20]], "300396": [[1, 20]], "300397": [[1, 20]], "300398": [[1, 20]], "300399": [[1, 20]], "300400": [[1, 677]], "300401": [[19, 673]], "300459": [[40, 332]], "300461": [[1, 98]], "300462": [[1, 97]], "300463": [[1, 124]], "300464": [[1, 103], [126, 265]], "300466": [[1, 650]], "300467": [[1, 563]], "300497": [[26, 175]], "300514": [[38, 150]], "300515": [[1, 838], [957, 1013]], "300516": [[1, 111]], "300517": [[1, 8], [103, 623]], "300558": [[8, 548]], "300560": [[1, 640], [645, 844]], "300574": [[15, 111]], "300575": [[1, 82]], "300576": [[7, 123], [125, 1206]], "300631": [[41, 49], [63, 66], [75, 226]], "300632": [[1, 21]], "300633": [[1, 447]], "300635": [[1, 23], [26, 176]], "300636": [[1, 335], [338, 1572]], "300673": [[41, 47], [49, 49], [52, 56], [59, 66]], "300674": [[1, 33]], "300675": [[1, 33]], "300676": [[1, 26]], "300742": [[56, 343]], "300777": [[21, 509]], "300780": [[3, 341]], "300785": [[1, 549], [552, 750], [752, 1201], [1219, 1272]], "300806": [[36, 214]], "300811": [[6, 508]], "300812": [[1, 59]], "300816": [[6, 161]], "300817": [[1, 33], [36, 74], [80, 383], [410, 493]], "301046": [[162, 223]], "301141": [[25, 31]], "301142": [[1, 897]], "301161": [[36, 805]], "301165": [[1, 145]], "301179": [[35, 59]], "301180": [[1, 97]], "301183": [[3, 10], [13, 303]], "301281": [[38, 157]], "301283": [[3, 886]], "301298": [[45, 949]], "301323": [[35, 474], [477, 990]], "301330": [[22, 353]], "301359": [[33, 319]], "301384": [[1, 476]], "301391": [[38, 214]], "301392": [[1, 627]], "301393": [[2, 18]], "301396": [[1, 33]], "301397": [[1, 228], [231, 517], [519, 728]], "301398": [[1, 9]], "301399": [[1, 108]], "301417": [[50, 367]], "301447": [[86, 96], [99, 400], [404, 512]], "301448": [[1, 329]], "301449": [[1, 404]], "301450": [[1, 173]], "301461": [[28, 581]], "301472": [[35, 830]], "301475": [[1, 18]], "301476": [[1, 844]], "301519": [[42, 250]], "301524": [[1, 110], [117, 263]], "301529": [[1, 49]], "301530": [[1, 110]], "301531": [[1, 394]], "301532": [[1, 611]], "301567": [[14, 372]], "301627": [[57, 943]], "301664": [[28, 445]], "301665": [[1, 294], [319, 487]], "301694": [[36, 102]], "301912": [[43, 52], [101, 422]], "301913": [[1, 58]], "301914": [[1, 350]], "301941": [[31, 568]], "301959": [[30, 1938]], "301960": [[1, 147]], "301970": [[6, 123]], "301984": [[17, 317]], "301985": [[1, 367]], "301986": [[1, 381]], "301987": [[1, 1128]], "301997": [[37, 407]], "301998": [[1, 1704]], "302019": [[34, 86]], "302026": [[24, 53], [66, 72]], "302029": [[1, 98]], "302031": [[1, 401], [403, 446], [448, 675], [678, 818]], "302033": [[1, 40], [44, 46]], "302034": [[1, 20]], "302037": [[18, 20]], "302038": [[10, 10]], "302040": [[1, 174]], "302041": [[1, 72]], "302042": [[1, 523]], "302043": [[1, 228]], "302131": [[71, 943]], "302159": [[33, 140]], "302163": [[32, 671], [674, 1230]], "302165": [[1, 85]], "302166": [[1, 16]], "302225": [[54, 133], [136, 923]], "302228": [[58, 78], [81, 293]], "302229": [[1, 457]], "302240": [[1, 960]], "302262": [[37, 471]], "302263": [[1, 1250]], "302277": [[15, 17], [22, 192], [194, 391]], "302279": [[1, 71]], "302280": [[1, 152]], "302322": [[33, 870]], "302328": [[42, 722]], "302337": [[27, 162]], "302342": [[19, 72]], "302343": [[1, 98]], "302344": [[3, 482]], "302350": [[1, 136]], "302388": [[27, 157], [164, 717]], "302392": [[45, 407]], "302393": [[1, 887]], "302448": [[21, 312], [317, 442], [445, 483], [486, 1926]], "302472": [[28, 808]], "302473": [[1, 368], [398, 406]], "302474": [[1, 305]], "302475": [[1, 7]], "302476": [[1, 259]], "302479": [[30, 222], [225, 340]], "302484": [[8, 176]], "302485": [[1, 922]], "302492": [[10, 21], [23, 59]], "302493": [[1, 7]], "302494": [[1, 618]], "302509": [[73, 92]], "302513": [[37, 89]], "302522": [[29, 46]], "302523": [[1, 59]], "302525": [[1, 677], [747, 778]], "302526": [[1, 582]], "302548": [[40, 124]], "302551": [[1, 7]], "302553": [[1, 188]], "302554": [[1, 7]], "302555": [[1, 11]], "302563": [[40, 46]], "302565": [[1, 7]], "302572": [[6, 291]], "302573": [[1, 693], [730, 1285]], "302596": [[47, 534], [545, 705], [710, 986]], "302597": [[1, 1054]], "302634": [[37, 73], [75, 123], [125, 129], [133, 165], [168, 175], [177, 216], [218, 358], [361, 375], [378, 404], [407, 423], [425, 503], [505, 578], [581, 594], [596, 638]], "302635": [[1, 22], [24, 28], [30, 39], [41, 53], [55, 132], [134, 144], [146, 265], [267, 271], [274, 344], [347, 357], [359, 375], [378, 384], [386, 414], [416, 494], [497, 608], [611, 634], [637, 684], [687, 706], [708, 724], [726, 901], [904, 954], [957, 982], [984, 1072], [1075, 1124], [1126, 1129], [1132, 1206], [1209, 1234], [1236, 1291]], "302651": [[1, 149]], "302654": [[1, 317]], "302661": [[1, 72]], "302663": [[1, 706]], "303825": [[1, 180]], "303832": [[54, 1334], [1338, 1913]], "303838": [[54, 54], [83, 2044]], "303885": [[60, 2052]], "303948": [[55, 1678]], "303998": [[58, 319]], "303999": [[1, 751]], "304000": [[1, 56]], "304062": [[54, 2014]], "304119": [[71, 138], [143, 150]], "304120": [[1, 253]], "304125": [[1, 1769]], "304144": [[76, 2596], [2598, 2656]], "304158": [[165, 1750], [1752, 2087]], "304169": [[50, 1714], [1731, 1733]], "304170": [[1, 620]], "304199": [[10, 18]], "304200": [[1, 321]], "304204": [[55, 607]], "304209": [[52, 98], [100, 133], [135, 157], [176, 253], [255, 477]], "304291": [[56, 85]], "304292": [[1, 1125], [1183, 1779], [1781, 1811]], "304333": [[74, 1653]], "304354": [[82, 295]], "304366": [[44, 1387], [1390, 1396], [1399, 1402], [1404, 1407], [1409, 1412], [1414, 1416], [1419, 1421], [1424, 1873]], "304446": [[40, 92], [110, 111]], "304447": [[1, 534], [540, 1644]], "304451": [[1, 60]], "304505": [[60, 86]], "304506": [[1, 370]], "304507": [[1, 239]], "304508": [[1, 1324]], "304562": [[52, 56], [60, 848]], "304616": [[52, 223], [227, 740], [747, 1002]], "304625": [[73, 536]], "304626": [[1, 8]], "304654": [[53, 704]], "304655": [[1, 1194]], "304661": [[53, 67], [69, 143], [147, 173], [175, 198], [237, 240]], "304662": [[1, 150]], "304663": [[1, 689]], "304671": [[51, 1193]], "304672": [[1, 60]], "304737": [[69, 149]], "304738": [[1, 1681]], "304739": [[3, 16]], "304740": [[1, 278]], "304776": [[49, 98]], "304777": [[1, 431], [438, 510]], "304778": [[4, 1300]], "304797": [[28, 87], [91, 306], [308, 377], [385, 1202], [1205, 2950]], "305044": [[3, 203], [302, 306], [309, 310], [313, 313], [318, 330]], "305045": [[1, 873]], "305046": [[1, 667], [671, 686]], "305059": [[63, 518], [520, 575]], "305062": [[1, 8]], "305063": [[1, 35]], "305064": [[1, 2045]], "305081": [[52, 1107]], "305112": [[68, 1527]], "305113": [[9, 72]], "305114": [[1, 526]], "305178": [[69, 124]], "305179": [[1, 21]], "305180": [[1, 9]], "305181": [[1, 8]], "305182": [[1, 8]], "305183": [[1, 231], [262, 266]], "305184": [[1, 8]], "305186": [[1, 112], [120, 422]], "305188": [[1, 1002]], "305202": [[74, 132], [136, 729]], "305204": [[1, 1229]], "305207": [[52, 1077]], "305208": [[1, 372]], "305234": [[52, 99]], "305236": [[1, 23]], "305237": [[1, 16], [18, 1147]], "305247": [[57, 433]], "305248": [[1, 957]], "305252": [[1, 548]], "305282": [[75, 207]], "305310": [[60, 157], [163, 458]], "305311": [[1, 153]], "305312": [[1, 227]], "305313": [[1, 741]], "305314": [[1, 404]], "305336": [[36, 241]], "305338": [[1, 107]], "305341": [[1, 503]], "305349": [[1, 34]], "305350": [[1, 21]], "305351": [[1, 868]], "305358": [[91, 231], [233, 253]], "305364": [[50, 147]], "305365": [[1, 668], [676, 832]], "305366": [[1, 721], [724, 756], [769, 934], [936, 1254]], "305376": [[71, 168]], "305377": [[9, 1292], [1294, 1383], [1386, 1525]], "305405": [[44, 536], [573, 575]], "305406": [[1, 394], [401, 520], [528, 535], [540, 1475]], "305440": [[20, 291]], "305441": [[1, 121]], "305516": [[46, 518], [558, 639]], "305517": [[1, 163]], "305518": [[1, 1134]], "305586": [[53, 583]], "305589": [[1, 691]], "305590": [[1, 500], [517, 1020]], "305636": [[60, 339], [342, 667], [671, 2390]], "305766": [[55, 902]], "305809": [[56, 197]], "305814": [[85, 689], [692, 978], [980, 1074], [1077, 1912]], "305821": [[59, 830]], "305832": [[87, 266]], "305840": [[1, 1144]], "305842": [[1, 862]], "305862": [[81, 705]], "305898": [[70, 780]], "305902": [[53, 521]], "305967": [[1, 32]], "306029": [[63, 96]], "306030": [[1, 110]], "306036": [[60, 63]], "306037": [[1, 49]], "306038": [[1, 139]], "306041": [[1, 320]], "306042": [[1, 371]], "306048": [[1, 140]], "306049": [[1, 358]], "306051": [[1, 415]], "306091": [[422, 629]], "306092": [[1, 588], [593, 976]], "306095": [[1, 300]], "306121": [[57, 152]], "306122": [[1, 127]], "306125": [[1, 756], [770, 2642], [2667, 3007]], "306126": [[1, 497]], "306134": [[53, 84]], "306135": [[1, 1095]], "306138": [[1, 1298]], "306139": [[1, 1112]], "306153": [[78, 165]], "306154": [[1, 251], [253, 691], [709, 1233]], "306155": [[1, 1440]], "306169": [[1, 745]], "306170": [[1, 22]], "306171": [[1, 503]], "306418": [[1, 33], [35, 75]], "306419": [[1, 62]], "306420": [[1, 108]], "306422": [[9, 126]], "306423": [[1, 333]], "306432": [[1, 339]], "306454": [[13, 101]], "306455": [[1, 11]], "306456": [[1, 237], [239, 787]], "306457": [[1, 31]], "306458": [[1, 17], [20, 35], [37, 41], [43, 47], [49, 53], [56, 60], [62, 66], [68, 72], [74, 77], [79, 83], [85, 89], [93, 102], [104, 108], [110, 114], [116, 120], [122, 126], [129, 139], [141, 145], [147, 151], [153, 166], [169, 173], [175, 179], [181, 185], [187, 191], [193, 197], [200, 210], [212, 216], [218, 222], [225, 229], [231, 235], [237, 241], [243, 247], [249, 249], [252, 256], [258, 268]], "306459": [[1, 512], [514, 2275]], "306460": [[1, 73]]}
\ No newline at end of file
diff --git a/bucoffea/data/json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt b/bucoffea/data/json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt
new file mode 100644
index 000000000..588275627
--- /dev/null
+++ b/bucoffea/data/json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt
@@ -0,0 +1,615 @@
+{
+  "297050": [[12, 137], [193, 776]],
+  "297056": [[12, 203]],
+  "297057": [[1, 4], [14, 105], [112, 377], [385, 418], [424, 509], [516, 906]],
+  "297099": [[24, 62]],
+  "297100": [[1, 15], [21, 369], [375, 381]],
+  "297101": [[1, 668], [673, 697], [700, 856], [862, 937], [943, 1101]],
+  "297113": [[1, 204], [211, 252]],
+  "297114": [[1, 99], [106, 161]],
+  "297175": [[1, 85]],
+  "297176": [[11, 120], [125, 214]],
+  "297177": [[1, 162]],
+  "297178": [
+    [1, 54],
+    [59, 334],
+    [342, 749],
+    [754, 967],
+    [972, 1037],
+    [1043, 1264],
+    [1272, 1282],
+    [1290, 1385]
+  ],
+  "297179": [[1, 6], [12, 97]],
+  "297215": [[1, 47]],
+  "297218": [[1, 27]],
+  "297219": [
+    [1, 80],
+    [85, 281],
+    [288, 579],
+    [585, 916],
+    [921, 1429],
+    [1436, 2004],
+    [2010, 2638]
+  ],
+  "297224": [[10, 19], [24, 138]],
+  "297225": [[1, 32]],
+  "297227": [[9, 192]],
+  "297292": [[1, 125], [130, 131], [136, 667], [675, 753]],
+  "297293": [[1, 121], [127, 150]],
+  "297296": [[1, 236], [240, 401], [406, 418], [425, 497]],
+  "297308": [[1, 44]],
+  "297359": [[39, 70], [164, 180]],
+  "297411": [[32, 737], [740, 800], [807, 950]],
+  "297424": [[32, 149]],
+  "297425": [[1, 107], [112, 157]],
+  "297426": [[1, 28], [34, 84], [90, 111]],
+  "297429": [[1, 72]],
+  "297430": [[1, 199]],
+  "297431": [[1, 49], [55, 64], [71, 188]],
+  "297432": [[1, 112]],
+  "297433": [[1, 159]],
+  "297434": [[1, 161]],
+  "297435": [[1, 94]],
+  "297467": [[50, 138]],
+  "297468": [[1, 74]],
+  "297469": [[1, 4], [9, 70]],
+  "297483": [[37, 68], [71, 201], [206, 214]],
+  "297484": [[1, 47], [53, 208], [214, 214]],
+  "297485": [[1, 16], [23, 253], [258, 299], [302, 314], [321, 420]],
+  "297486": [[1, 74], [79, 598], [603, 625]],
+  "297487": [[1, 433], [439, 491], [495, 603], [609, 613]],
+  "297488": [[1, 73], [80, 424]],
+  "297503": [
+    [5, 275],
+    [282, 559],
+    [566, 606],
+    [612, 635],
+    [642, 772],
+    [777, 779]
+  ],
+  "297504": [[1, 41], [125, 136]],
+  "297505": [[1, 394]],
+  "297557": [[8, 28], [67, 113], [119, 167], [173, 174], [180, 394]],
+  "297558": [[9, 266]],
+  "297562": [[1, 69], [120, 369]],
+  "297563": [[1, 254], [260, 264]],
+  "297598": [[17, 17], [22, 33]],
+  "297599": [[1, 169], [211, 225], [230, 312], [319, 385], [395, 407]],
+  "297603": [[1, 420]],
+  "297604": [[1, 126], [131, 272], [279, 375], [381, 407]],
+  "297605": [[1, 6], [13, 20], [24, 89], [95, 223], [257, 407]],
+  "297606": [[1, 94], [99, 231]],
+  "297620": [[32, 318]],
+  "297656": [
+    [64, 116],
+    [123, 135],
+    [140, 230],
+    [269, 307],
+    [313, 330],
+    [341, 388],
+    [393, 433]
+  ],
+  "297665": [[1, 153], [159, 209], [214, 279]],
+  "297666": [[1, 11], [17, 81], [86, 121]],
+  "297670": [[21, 34]],
+  "297674": [[3, 102], [108, 188]],
+  "297675": [[1, 123], [129, 239], [244, 328], [334, 467], [470, 471]],
+  "297722": [[55, 160], [165, 353]],
+  "297723": [[1, 13], [51, 222]],
+  "298996": [[33, 216]],
+  "298997": [[1, 37], [47, 47]],
+  "299000": [[4, 77]],
+  "299042": [[33, 55]],
+  "299061": [[38, 355]],
+  "299062": [[1, 163], [166, 303]],
+  "299064": [[7, 85]],
+  "299065": [[13, 248], [251, 342]],
+  "299067": [[1, 459]],
+  "299096": [[2, 97]],
+  "299149": [[29, 470]],
+  "299178": [[37, 56], [58, 111]],
+  "299180": [[5, 98]],
+  "299184": [[1, 561]],
+  "299185": [[1, 120]],
+  "299327": [[1, 72]],
+  "299329": [[1, 172]],
+  "299368": [[37, 175]],
+  "299369": [[1, 303]],
+  "299370": [[1, 7], [47, 705]],
+  "299380": [[34, 227]],
+  "299381": [[1, 45]],
+  "299394": [[5, 33]],
+  "299395": [[1, 187]],
+  "299396": [[1, 81]],
+  "299420": [[2, 50]],
+  "299443": [[145, 164]],
+  "299450": [[39, 88]],
+  "299477": [[39, 42], [82, 87]],
+  "299478": [[1, 175]],
+  "299479": [[1, 123]],
+  "299480": [[1, 6], [8, 715]],
+  "299481": [
+    [1, 196],
+    [199, 236],
+    [260, 479],
+    [487, 940],
+    [943, 1037],
+    [1061, 1257]
+  ],
+  "299593": [[95, 177], [179, 896]],
+  "299594": [[1, 317]],
+  "299595": [[1, 134], [138, 138]],
+  "299597": [[3, 91], [93, 540]],
+  "299649": [[151, 332]],
+  "300087": [[36, 59], [61, 126], [128, 216], [218, 239]],
+  "300105": [[1, 21]],
+  "300106": [[1, 74]],
+  "300107": [[1, 28], [30, 47]],
+  "300117": [[35, 67]],
+  "300122": [[46, 730], [735, 924], [927, 1295]],
+  "300123": [[1, 384], [387, 612]],
+  "300155": [[35, 1229]],
+  "300156": [[1, 72]],
+  "300157": [[9, 1107]],
+  "300226": [[43, 448]],
+  "300233": [[43, 162]],
+  "300234": [[1, 59]],
+  "300235": [[1, 187]],
+  "300236": [[11, 187]],
+  "300237": [[1, 713], [716, 717]],
+  "300238": [[30, 58], [62, 329]],
+  "300239": [[1, 145], [148, 167], [171, 213]],
+  "300240": [[1, 7], [11, 46], [51, 362]],
+  "300280": [
+    [52, 56],
+    [61, 69],
+    [73, 150],
+    [155, 165],
+    [178, 198],
+    [207, 222],
+    [226, 251],
+    [255, 268],
+    [275, 345],
+    [349, 370],
+    [381, 548],
+    [553, 607],
+    [617, 639],
+    [663, 691]
+  ],
+  "300281": [[3, 8]],
+  "300282": [[1, 9], [13, 59], [73, 92], [97, 114], [142, 151], [156, 186]],
+  "300283": [[1, 34]],
+  "300284": [
+    [1, 22],
+    [38, 47],
+    [50, 82],
+    [90, 98],
+    [108, 130],
+    [133, 152],
+    [156, 250],
+    [260, 414],
+    [420, 561],
+    [568, 585],
+    [590, 680],
+    [691, 751]
+  ],
+  "300364": [[27, 46]],
+  "300372": [[1, 8]],
+  "300375": [[1, 93]],
+  "300389": [[1, 1], [4, 5], [8, 8], [11, 20], [23, 39], [60, 149]],
+  "300399": [[1, 20]],
+  "300400": [[1, 677]],
+  "300401": [[19, 673]],
+  "300459": [[40, 332]],
+  "300461": [[1, 98]],
+  "300462": [[1, 97]],
+  "300463": [[1, 124]],
+  "300464": [[1, 103], [126, 265]],
+  "300466": [[1, 650]],
+  "300467": [[1, 563]],
+  "300497": [[26, 175]],
+  "300514": [[38, 150]],
+  "300515": [[1, 838], [957, 1013]],
+  "300516": [[1, 111]],
+  "300517": [[1, 8], [103, 623]],
+  "300558": [[8, 548]],
+  "300560": [[1, 640], [645, 844]],
+  "300574": [[15, 111]],
+  "300575": [[1, 82]],
+  "300576": [[7, 123], [125, 1206]],
+  "300631": [[41, 49], [63, 66], [75, 226]],
+  "300632": [[1, 21]],
+  "300633": [[1, 447]],
+  "300635": [[1, 23], [26, 176]],
+  "300636": [[1, 335], [338, 1572]],
+  "300673": [[41, 47], [49, 49], [52, 56], [59, 66]],
+  "300674": [[1, 33]],
+  "300675": [[1, 33]],
+  "300676": [[1, 26]],
+  "300742": [[56, 343]],
+  "300777": [[21, 509]],
+  "300780": [[3, 341]],
+  "300785": [[1, 549], [552, 750], [752, 1201], [1219, 1272]],
+  "300806": [[36, 214]],
+  "300811": [[6, 508]],
+  "300812": [[1, 59]],
+  "300816": [[6, 161]],
+  "300817": [[1, 33], [36, 74], [80, 383], [410, 493]],
+  "301046": [[162, 223]],
+  "301141": [[25, 31]],
+  "301142": [[1, 897]],
+  "301161": [[36, 805]],
+  "301165": [[1, 145]],
+  "301179": [[35, 59]],
+  "301180": [[1, 97]],
+  "301183": [[3, 10], [13, 303]],
+  "301281": [[38, 157]],
+  "301283": [[3, 886]],
+  "301298": [[45, 949]],
+  "301323": [[35, 474], [477, 990]],
+  "301330": [[22, 353]],
+  "301359": [[33, 319]],
+  "301384": [[1, 476]],
+  "301391": [[38, 214]],
+  "301392": [[1, 627]],
+  "301393": [[2, 18]],
+  "301396": [[1, 33]],
+  "301397": [[1, 228], [231, 517], [519, 728]],
+  "301398": [[1, 9]],
+  "301399": [[1, 108]],
+  "301417": [[50, 367]],
+  "301447": [[86, 96], [99, 400], [404, 512]],
+  "301448": [[1, 329]],
+  "301449": [[1, 404]],
+  "301450": [[1, 173]],
+  "301461": [[28, 581]],
+  "301472": [[35, 830]],
+  "301475": [[1, 18]],
+  "301476": [[1, 844]],
+  "301519": [[42, 250]],
+  "301524": [[1, 110], [117, 263]],
+  "301529": [[1, 49]],
+  "301530": [[1, 110]],
+  "301531": [[1, 394]],
+  "301532": [[1, 611]],
+  "301567": [[14, 372]],
+  "301627": [[57, 943]],
+  "301664": [[28, 445]],
+  "301665": [[1, 294], [319, 487]],
+  "301694": [[36, 102]],
+  "301912": [[43, 52], [101, 422]],
+  "301913": [[1, 58]],
+  "301914": [[1, 350]],
+  "301941": [[31, 568]],
+  "301959": [[30, 1938]],
+  "301960": [[1, 147]],
+  "301970": [[6, 123]],
+  "301984": [[17, 317]],
+  "301985": [[1, 367]],
+  "301986": [[1, 381]],
+  "301987": [[1, 1128]],
+  "301997": [[37, 407]],
+  "301998": [[1, 1704]],
+  "302019": [[34, 86]],
+  "302026": [[24, 53], [66, 72]],
+  "302029": [[1, 98]],
+  "302031": [[1, 401], [403, 446], [448, 675], [678, 818]],
+  "302033": [[1, 40], [44, 46]],
+  "302034": [[1, 20]],
+  "302037": [[18, 20]],
+  "302038": [[10, 10]],
+  "302040": [[1, 174]],
+  "302041": [[1, 72]],
+  "302042": [[1, 523]],
+  "302043": [[1, 228]],
+  "302131": [[71, 943]],
+  "302159": [[33, 140]],
+  "302163": [[32, 671], [674, 1230]],
+  "302165": [[1, 85]],
+  "302166": [[1, 16]],
+  "302225": [[54, 133], [136, 923]],
+  "302228": [[58, 78], [81, 293]],
+  "302229": [[1, 457]],
+  "302240": [[1, 960]],
+  "302262": [[37, 471]],
+  "302263": [[1, 1250]],
+  "302277": [[15, 17], [22, 192], [194, 391]],
+  "302279": [[1, 71]],
+  "302280": [[1, 152]],
+  "302322": [[33, 870]],
+  "302328": [[42, 722]],
+  "302337": [[27, 162]],
+  "302342": [[19, 72]],
+  "302343": [[1, 98]],
+  "302344": [[3, 482]],
+  "302350": [[1, 136]],
+  "302388": [[27, 157], [164, 717]],
+  "302392": [[45, 407]],
+  "302393": [[1, 887]],
+  "302448": [[21, 312], [317, 442], [445, 483], [486, 1926]],
+  "302472": [[28, 808]],
+  "302473": [[1, 368], [398, 406]],
+  "302474": [[1, 305]],
+  "302475": [[1, 7]],
+  "302476": [[1, 259]],
+  "302479": [[30, 222], [225, 340]],
+  "302484": [[8, 176]],
+  "302485": [[1, 922]],
+  "302492": [[10, 21], [23, 59]],
+  "302493": [[1, 7]],
+  "302494": [[1, 618]],
+  "302509": [[73, 92]],
+  "302513": [[37, 89]],
+  "302522": [[29, 46]],
+  "302523": [[1, 59]],
+  "302525": [[1, 677], [747, 778]],
+  "302526": [[1, 582]],
+  "302548": [[40, 124]],
+  "302551": [[1, 7]],
+  "302553": [[1, 188]],
+  "302554": [[1, 7]],
+  "302555": [[1, 11]],
+  "302563": [[40, 46]],
+  "302565": [[1, 7]],
+  "302572": [[6, 291]],
+  "302573": [[1, 693], [730, 1285]],
+  "302596": [[47, 534], [545, 705], [710, 986]],
+  "302597": [[1, 1054]],
+  "302634": [
+    [37, 73],
+    [75, 123],
+    [125, 129],
+    [133, 165],
+    [168, 175],
+    [177, 216],
+    [218, 358],
+    [361, 375],
+    [378, 404],
+    [407, 423],
+    [425, 503],
+    [505, 578],
+    [581, 594],
+    [596, 638]
+  ],
+  "302635": [
+    [1, 22],
+    [24, 28],
+    [30, 39],
+    [41, 53],
+    [55, 132],
+    [134, 144],
+    [146, 265],
+    [267, 271],
+    [274, 344],
+    [347, 357],
+    [359, 375],
+    [378, 384],
+    [386, 414],
+    [416, 494],
+    [497, 608],
+    [611, 634],
+    [637, 684],
+    [687, 706],
+    [708, 724],
+    [726, 901],
+    [904, 954],
+    [957, 982],
+    [984, 1072],
+    [1075, 1124],
+    [1126, 1129],
+    [1132, 1206],
+    [1209, 1234],
+    [1236, 1291]
+  ],
+  "302651": [[1, 149]],
+  "302654": [[1, 317]],
+  "302661": [[1, 72]],
+  "302663": [[1, 706]],
+  "303825": [[1, 180]],
+  "303832": [[54, 1334], [1338, 1913]],
+  "303838": [[54, 54], [83, 2044]],
+  "303885": [[60, 2052]],
+  "303948": [[55, 1678]],
+  "303998": [[58, 319]],
+  "303999": [[1, 751]],
+  "304000": [[1, 56]],
+  "304062": [[54, 2014]],
+  "304119": [[71, 138], [143, 150]],
+  "304120": [[1, 253]],
+  "304125": [[1, 1769]],
+  "304144": [[76, 2596], [2598, 2656]],
+  "304158": [[165, 1750], [1752, 2087]],
+  "304169": [[50, 1714], [1731, 1733]],
+  "304170": [[1, 620]],
+  "304199": [[10, 18]],
+  "304200": [[1, 321]],
+  "304204": [[55, 607]],
+  "304209": [[52, 98], [100, 133], [135, 157], [176, 253], [255, 477]],
+  "304291": [[56, 85]],
+  "304292": [[1, 1125], [1183, 1779], [1781, 1811]],
+  "304333": [[74, 1653]],
+  "304354": [[82, 295]],
+  "304366": [
+    [44, 1387],
+    [1390, 1396],
+    [1399, 1402],
+    [1404, 1407],
+    [1409, 1412],
+    [1414, 1416],
+    [1419, 1421],
+    [1424, 1873]
+  ],
+  "304446": [[40, 92], [110, 111]],
+  "304447": [[1, 534], [540, 1644]],
+  "304451": [[1, 60]],
+  "304505": [[60, 86]],
+  "304506": [[1, 370]],
+  "304507": [[1, 239]],
+  "304508": [[1, 1324]],
+  "304562": [[52, 56], [60, 848]],
+  "304616": [[52, 223], [227, 740], [747, 1002]],
+  "304625": [[73, 536]],
+  "304626": [[1, 8]],
+  "304654": [[53, 704]],
+  "304655": [[1, 1194]],
+  "304661": [[53, 67], [69, 143], [147, 173], [175, 198], [237, 240]],
+  "304662": [[1, 150]],
+  "304663": [[1, 689]],
+  "304671": [[51, 1193]],
+  "304672": [[1, 60]],
+  "304737": [[69, 149]],
+  "304738": [[1, 1681]],
+  "304739": [[3, 16]],
+  "304740": [[1, 278]],
+  "304776": [[49, 98]],
+  "304777": [[1, 431], [438, 510]],
+  "304778": [[4, 1300]],
+  "304797": [[28, 87], [91, 306], [308, 377], [385, 1202], [1205, 2950]],
+  "305044": [[3, 203], [302, 306], [309, 310], [313, 313], [318, 330]],
+  "305045": [[1, 873]],
+  "305046": [[1, 667], [671, 686]],
+  "305059": [[63, 518], [520, 575]],
+  "305062": [[1, 8]],
+  "305063": [[1, 35]],
+  "305064": [[1, 2045]],
+  "305081": [[52, 1107]],
+  "305112": [[68, 1527]],
+  "305113": [[9, 72]],
+  "305114": [[1, 526]],
+  "305178": [[69, 124]],
+  "305179": [[1, 21]],
+  "305180": [[1, 9]],
+  "305181": [[1, 8]],
+  "305182": [[1, 8]],
+  "305183": [[1, 231], [262, 266]],
+  "305184": [[1, 8]],
+  "305186": [[1, 112], [120, 422]],
+  "305188": [[1, 1002]],
+  "305202": [[74, 132], [136, 729]],
+  "305204": [[1, 1229]],
+  "305207": [[52, 1077]],
+  "305208": [[1, 372]],
+  "305234": [[52, 99]],
+  "305236": [[1, 23]],
+  "305237": [[1, 16], [18, 1147]],
+  "305247": [[57, 433]],
+  "305248": [[1, 957]],
+  "305252": [[1, 548]],
+  "305282": [[75, 207]],
+  "305310": [[60, 157], [163, 458]],
+  "305311": [[1, 153]],
+  "305312": [[1, 227]],
+  "305313": [[1, 741]],
+  "305314": [[1, 404]],
+  "305336": [[36, 241]],
+  "305338": [[1, 107]],
+  "305341": [[1, 503]],
+  "305349": [[1, 34]],
+  "305350": [[1, 21]],
+  "305351": [[1, 868]],
+  "305358": [[91, 231], [233, 253]],
+  "305364": [[50, 147]],
+  "305365": [[1, 668], [676, 832]],
+  "305366": [[1, 721], [724, 756], [769, 934], [936, 1254]],
+  "305376": [[71, 168]],
+  "305377": [[9, 1292], [1294, 1383], [1386, 1525]],
+  "305405": [[44, 536], [573, 575]],
+  "305406": [[1, 394], [401, 520], [528, 535], [540, 1475]],
+  "305440": [[20, 291]],
+  "305441": [[1, 121]],
+  "305516": [[46, 518], [558, 639]],
+  "305517": [[1, 163]],
+  "305518": [[1, 1134]],
+  "305586": [[53, 583]],
+  "305589": [[1, 691]],
+  "305590": [[1, 500], [517, 1020]],
+  "305636": [[60, 339], [342, 667], [671, 2390]],
+  "305766": [[55, 902]],
+  "305809": [[56, 197]],
+  "305814": [[85, 689], [692, 978], [980, 1074], [1077, 1912]],
+  "305821": [[59, 830]],
+  "305832": [[87, 266]],
+  "305840": [[1, 1144]],
+  "305842": [[1, 862]],
+  "305862": [[81, 705]],
+  "305898": [[70, 780]],
+  "305902": [[53, 521]],
+  "305967": [[1, 32]],
+  "306029": [[63, 96]],
+  "306030": [[1, 110]],
+  "306036": [[60, 63]],
+  "306037": [[1, 49]],
+  "306038": [[1, 139]],
+  "306041": [[1, 320]],
+  "306042": [[1, 371]],
+  "306048": [[1, 140]],
+  "306049": [[1, 358]],
+  "306051": [[1, 415]],
+  "306091": [[422, 629]],
+  "306092": [[1, 588], [593, 976]],
+  "306095": [[1, 300]],
+  "306121": [[57, 152]],
+  "306122": [[1, 127]],
+  "306125": [[1, 756], [770, 2642], [2667, 3007]],
+  "306126": [[1, 497]],
+  "306134": [[53, 84]],
+  "306135": [[1, 1095]],
+  "306138": [[1, 1298]],
+  "306139": [[1, 1112]],
+  "306153": [[78, 165]],
+  "306154": [[1, 251], [253, 691], [709, 1233]],
+  "306155": [[1, 1440]],
+  "306169": [[1, 745]],
+  "306170": [[1, 22]],
+  "306171": [[1, 503]],
+  "306418": [[1, 33], [35, 75]],
+  "306419": [[1, 62]],
+  "306420": [[1, 108]],
+  "306422": [[9, 126]],
+  "306423": [[1, 333]],
+  "306432": [[1, 339]],
+  "306454": [[13, 101]],
+  "306455": [[1, 11]],
+  "306456": [[1, 237], [239, 787]],
+  "306457": [[1, 31]],
+  "306458": [
+    [1, 17],
+    [20, 35],
+    [37, 41],
+    [43, 47],
+    [49, 53],
+    [56, 60],
+    [62, 66],
+    [68, 72],
+    [74, 77],
+    [79, 83],
+    [85, 89],
+    [93, 102],
+    [104, 108],
+    [110, 114],
+    [116, 120],
+    [122, 126],
+    [129, 139],
+    [141, 145],
+    [147, 151],
+    [153, 166],
+    [169, 173],
+    [175, 179],
+    [181, 185],
+    [187, 191],
+    [193, 197],
+    [200, 210],
+    [212, 216],
+    [218, 222],
+    [225, 229],
+    [231, 235],
+    [237, 241],
+    [243, 247],
+    [249, 249],
+    [252, 256],
+    [258, 268]
+  ],
+  "306459": [[1, 512], [514, 2275]],
+  "306460": [[1, 73]]
+}
\ No newline at end of file
diff --git a/bucoffea/data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt b/bucoffea/data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt
deleted file mode 100644
index e2c924ad1..000000000
--- a/bucoffea/data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"315257": [[1, 88], [91, 92]], "315259": [[1, 172]], "315264": [[32, 261]], "315265": [[4, 58]], "315267": [[1, 244]], "315270": [[1, 633]], "315322": [[23, 118], [122, 1354]], "315339": [[37, 654]], "315357": [[44, 732], [736, 770], [780, 831]], "315361": [[40, 619]], "315363": [[1, 35], [37, 47], [49, 67], [69, 80], [82, 90]], "315366": [[10, 61], [67, 750]], "315420": [[28, 920], [924, 942], [954, 1748]], "315488": [[42, 843]], "315489": [[1, 653], [672, 709]], "315490": [[1, 24]], "315506": [[13, 100]], "315510": [[1, 345]], "315512": [[1, 1122]], "315543": [[55, 171]], "315555": [[22, 97]], "315556": [[1, 26]], "315557": [[1, 279]], "315640": [[46, 87]], "315641": [[1, 4]], "315642": [[1, 92]], "315644": [[1, 184]], "315645": [[1, 40], [47, 390], [395, 565], [567, 594]], "315646": [[1, 1033]], "315647": [[1, 58]], "315648": [[1, 110]], "315689": [[24, 1127], [1180, 1186]], "315690": [[10, 654]], "315702": [[38, 113]], "315703": [[1, 545]], "315704": [[1, 61]], "315705": [[1, 700]], "315713": [[35, 359], [374, 385], [400, 1123]], "315721": [[33, 50], [56, 626]], "315741": [[34, 92]], "315764": [[37, 309]], "315770": [[39, 332]], "315784": [[29, 33], [40, 156], [158, 161]], "315785": [[1, 198], [201, 305]], "315786": [[1, 72]], "315790": [[1, 716], [718, 922]], "315800": [[41, 621]], "315801": [[1, 344]], "315840": [[33, 1154]], "315973": [[39, 240], [262, 914]], "315974": [[1, 71]], "316058": [[42, 405]], "316059": [[1, 321], [323, 567]], "316060": [[1, 935]], "316061": [[1, 23], [194, 206]], "316062": [[1, 4]], "316082": [[37, 407]], "316110": [[1, 210]], "316111": [[1, 48]], "316113": [[1, 64]], "316114": [[1, 777], [779, 1562]], "316153": [[1, 770]], "316186": [[38, 81]], "316187": [[1, 1091], [1093, 1100], [1207, 2077]], "316199": [[33, 1197]], "316200": [[1, 10]], "316201": [[1, 498]], "316202": [[1, 403]], "316216": [[25, 466]], "316217": [[1, 264]], "316218": [[1, 1008]], "316219": [[1, 283]], "316239": [[38, 626]], "316240": [[1, 1224]], "316241": [[1, 325]], "316271": [[36, 121]], "316361": [[22, 124], [126, 131], [133, 135], [137, 137], [139, 142], [144, 145], [147, 147], [149, 159], [161, 174], [176, 178], [180, 189], [191, 197], [199, 208], [210, 223]], "316362": [[1, 208], [210, 212], [214, 225], [227, 242], [244, 269], [271, 319], [332, 392], [394, 395], [397, 402], [404, 404], [406, 410], [412, 412], [414, 418], [420, 428], [430, 450]], "316363": [[1, 39], [41, 49]], "316377": [[19, 19], [21, 40]], "316378": [[1, 29]], "316379": [[1, 70]], "316380": [[1, 708], [714, 1213]], "316455": [[36, 71]], "316457": [[1, 1454]], "316469": [[17, 444]], "316470": [[1, 476]], "316472": [[1, 70], [76, 333]], "316505": [[44, 205], [207, 921], [923, 1364]], "316569": [[20, 703], [742, 1945]], "316590": [[17, 526]], "316613": [[49, 241]], "316615": [[1, 338]], "316666": [[1, 981]], "316667": [[1, 197]], "316700": [[46, 346], [388, 397]], "316701": [[1, 479]], "316702": [[1, 388]], "316715": [[33, 45]], "316716": [[1, 181]], "316717": [[1, 192]], "316718": [[1, 311]], "316719": [[1, 91], [100, 144]], "316720": [[1, 182]], "316721": [[1, 15]], "316722": [[1, 751]], "316723": [[1, 64]], "316758": [[11, 1609]], "316766": [[51, 1920], [1922, 2199]], "316876": [[34, 38], [40, 644]], "316877": [[1, 164], [171, 401]], "316879": [[1, 156]], "316928": [[40, 188]], "316985": [[33, 503]], "316993": [[44, 254]], "316994": [[1, 14]], "316995": [[1, 623]], "317080": [[41, 66]], "317087": [[43, 177], [213, 222], [257, 852]], "317089": [[1, 1003]], "317182": [[47, 63], [65, 1424]], "317212": [[36, 175]], "317213": [[1, 375]], "317279": [[43, 508]], "317291": [[34, 824]], "317292": [[1, 330]], "317297": [[1, 283], [347, 760]], "317319": [[44, 182]], "317320": [[1, 326], [333, 411], [413, 1827]], "317338": [[66, 107]], "317339": [[1, 163]], "317340": [[1, 418]], "317382": [[58, 128]], "317383": [[1, 58]], "317391": [[39, 46]], "317392": [[1, 1116], [1119, 1900]], "317435": [[1, 1397]], "317438": [[1, 68], [71, 309]], "317475": [[33, 89], [105, 115]], "317478": [[1, 23]], "317484": [[1, 448], [467, 514], [519, 545]], "317488": [[1, 844]], "317527": [[41, 1487]], "317591": [[43, 334]], "317626": [[40, 2045]], "317640": [[29, 829]], "317641": [[1, 1390]], "317648": [[45, 139]], "317649": [[1, 621]], "317650": [[1, 1304]], "317661": [[35, 1256]], "317663": [[1, 858]], "317683": [[83, 402]], "317696": [[38, 682]], "318733": [[1, 33]], "318828": [[54, 123]], "318872": [[16, 287]], "318874": [[1, 320]], "318876": [[1, 161]], "318877": [[1, 615]], "319077": [[52, 92]], "319337": [[48, 2240]], "319347": [[40, 690]], "319348": [[1, 37]], "319349": [[1, 148]], "319449": [[35, 559], [562, 734]], "319450": [[1, 287], [290, 683]], "319456": [[138, 346]], "319459": [[1, 78]], "319486": [[38, 103]], "319503": [[1, 317]], "319524": [[36, 1459]], "319526": [[1, 282]], "319528": [[1, 259]], "319579": [[41, 3168]], "319625": [[17, 206]], "319639": [[31, 1509]], "319656": [[51, 310]], "319657": [[1, 167]], "319658": [[1, 225]], "319659": [[1, 87]], "319678": [[36, 294]], "319687": [[46, 90]], "319697": [[47, 482], [490, 490]], "319698": [[1, 312]], "319756": [[44, 1966]], "319840": [[41, 388]], "319841": [[1, 167]], "319847": [[49, 51]], "319848": [[1, 53]], "319849": [[1, 492]], "319851": [[1, 4]], "319853": [[1, 40], [47, 262]], "319854": [[1, 225]], "319908": [[1, 40], [43, 53]], "319909": [[1, 7]], "319910": [[1, 983]], "319912": [[1, 59]], "319913": [[1, 56]], "319914": [[1, 32]], "319915": [[1, 416]], "319941": [[43, 298]], "319942": [[1, 50]], "319950": [[38, 205]], "319991": [[46, 882]], "319992": [[1, 264]], "319993": [[1, 955]], "320002": [[52, 192]], "320006": [[1, 34], [36, 341]], "320010": [[1, 330]], "320011": [[1, 302]], "320012": [[1, 99]], "320023": [[17, 292]], "320024": [[1, 410]], "320025": [[1, 113]], "320026": [[1, 204]], "320038": [[43, 663]], "320039": [[1, 30]], "320040": [[1, 737]], "320059": [[1, 105]], "320060": [[1, 42]], "320061": [[1, 49]], "320062": [[1, 21]], "320063": [[1, 64]], "320064": [[1, 200]], "320065": [[1, 920]], "320673": [[35, 901]], "320674": [[1, 599]], "320688": [[49, 531]], "320712": [[39, 242]], "320757": [[51, 382]], "320804": [[46, 1274]], "320807": [[1, 7]], "320809": [[1, 716]], "320821": [[41, 221]], "320822": [[1, 523]], "320823": [[1, 360]], "320824": [[1, 1051]], "320838": [[93, 357]], "320840": [[1, 471]], "320841": [[1, 205]], "320853": [[41, 369]], "320854": [[1, 125]], "320855": [[1, 565]], "320856": [[1, 159]], "320857": [[1, 272]], "320858": [[1, 230]], "320859": [[1, 40]], "320887": [[49, 321]], "320888": [[1, 26]], "320916": [[2, 25]], "320917": [[1, 1926]], "320920": [[1, 178]], "320933": [[40, 214]], "320934": [[1, 831]], "320936": [[1, 407]], "320941": [[1, 93]], "320980": [[44, 142]], "320995": [[26, 214]], "320996": [[1, 380]], "321004": [[39, 188]], "321005": [[1, 61]], "321006": [[1, 162]], "321007": [[1, 831]], "321009": [[1, 85]], "321010": [[1, 342]], "321011": [[1, 213]], "321012": [[1, 35], [190, 201]], "321051": [[58, 1179]], "321055": [[1, 302], [304, 326], [328, 340], [368, 759]], "321067": [[39, 225], [232, 639]], "321068": [[1, 715]], "321069": [[1, 313]], "321119": [[45, 214]], "321121": [[1, 47]], "321122": [[1, 395]], "321124": [[1, 819]], "321126": [[1, 493]], "321134": [[33, 70]], "321138": [[1, 741]], "321140": [[1, 798]], "321149": [[35, 1424], [1426, 1476], [1478, 1553], [1558, 1576], [1578, 1588], [1591, 1743]], "321165": [[1, 8]], "321166": [[1, 10]], "321167": [[1, 141], [143, 143], [145, 510], [512, 552], [554, 691], [693, 923]], "321177": [[38, 74], [77, 214], [216, 232], [234, 247], [249, 321], [323, 365], [367, 455]], "321178": [[5, 78]], "321218": [[49, 962]], "321219": [[1, 934]], "321221": [[1, 40]], "321230": [[41, 124]], "321231": [[1, 59]], "321232": [[1, 30]], "321233": [[1, 727]], "321262": [[1, 4]], "321283": [[48, 357]], "321294": [[1, 62]], "321295": [[1, 307], [309, 316], [318, 384], [390, 394], [396, 604], [606, 616], [619, 646], [649, 690], [693, 754]], "321296": [[1, 24], [34, 41], [44, 67]], "321305": [[20, 2600], [2605, 2651]], "321311": [[1, 10]], "321312": [[1, 768]], "321313": [[1, 408]], "321393": [[1, 127], [134, 148]], "321396": [[1, 1475]], "321397": [[1, 365]], "321414": [[31, 1283]], "321415": [[1, 804]], "321431": [[30, 189]], "321432": [[1, 47]], "321433": [[1, 125]], "321434": [[1, 642]], "321436": [[1, 710]], "321457": [[43, 451], [453, 1888]], "321461": [[1, 149]], "321475": [[50, 518], [526, 2084]], "321710": [[1, 57]], "321712": [[1, 2], [16, 54], [57, 115], [117, 263]], "321730": [[2, 257], [259, 291]], "321732": [[1, 127], [129, 181], [185, 189], [192, 245], [248, 252], [254, 373], [375, 381], [386, 386], [389, 392], [395, 424], [426, 432], [434, 448], [450, 452], [454, 459], [467, 586], [589, 680], [682, 686], [689, 903], [905, 973], [975, 1448]], "321735": [[1, 146]], "321755": [[33, 361], [363, 470], [472, 473], [475, 487], [489, 729]], "321758": [[1, 47], [49, 75], [77, 121], [128, 130], [146, 148], [151, 155], [161, 165], [168, 189]], "321760": [[1, 171], [175, 205], [207, 238], [240, 258], [260, 420], [422, 520], [526, 586], [588, 593], [598, 602], [604, 607], [613, 716], [719, 721], [727, 788], [794, 818], [822, 824], [828, 830], [834, 836], [840, 841], [845, 855]], "321773": [[11, 14], [25, 35], [39, 52], [54, 79]], "321774": [[1, 12], [14, 52], [54, 119]], "321775": [[1, 12], [14, 14]], "321776": [[1, 12], [15, 19], [30, 45]], "321777": [[1, 81], [83, 169], [174, 176], [192, 207]], "321778": [[8, 150]], "321780": [[1, 332], [336, 338], [342, 346], [351, 357], [359, 360], [362, 371], [374, 383], [392, 412], [414, 420], [422, 493], [496, 499], [502, 503], [505, 508], [517, 518]], "321781": [[6, 37], [53, 56], [58, 66], [69, 69], [77, 180], [186, 209], [212, 265], [269, 274], [276, 290], [293, 312], [316, 410], [412, 427]], "321813": [[32, 352]], "321815": [[1, 23]], "321817": [[1, 536]], "321818": [[1, 690]], "321820": [[1, 214]], "321831": [[25, 781]], "321832": [[1, 389], [403, 510]], "321833": [[1, 407]], "321834": [[1, 333]], "321879": [[39, 47], [50, 52], [55, 68], [71, 73], [77, 89], [93, 95], [99, 111], [114, 116], [120, 132], [136, 138], [141, 154], [157, 159], [163, 175], [178, 181], [185, 197], [200, 202], [207, 218], [222, 356]], "321880": [[1, 41], [44, 132]], "321887": [[54, 948]], "321908": [[43, 472]], "321909": [[1, 208], [210, 1654]], "321917": [[4, 156], [164, 808]], "321919": [[1, 6]], "321933": [[43, 232], [235, 326]], "321960": [[18, 47]], "321961": [[1, 354]], "321973": [[37, 746], [748, 968], [972, 1253]], "321975": [[1, 866]], "321988": [[45, 996], [1106, 1486]], "321990": [[1, 471]], "322013": [[14, 22]], "322014": [[1, 17]], "322022": [[42, 185], [201, 1805]], "322040": [[32, 70]], "322057": [[38, 58]], "322068": [[51, 724]], "322079": [[39, 200], [216, 393], [409, 428]], "322106": [[48, 871]], "322113": [[48, 159]], "322118": [[1, 516], [530, 874]], "322179": [[43, 820], [823, 1783]], "322201": [[39, 266]], "322204": [[1, 280], [282, 301], [303, 331], [337, 1143]], "322222": [[1, 526]], "322252": [[42, 1586]], "322317": [[48, 101]], "322319": [[1, 163]], "322322": [[1, 170], [267, 1205]], "322324": [[1, 416]], "322332": [[37, 1055]], "322348": [[40, 1505]], "322355": [[36, 137]], "322356": [[1, 779]], "322381": [[45, 577]], "322407": [[46, 582]], "322430": [[46, 501]], "322431": [[59, 1166]], "322480": [[60, 408]], "322492": [[1, 1386]], "322510": [[37, 45]], "322599": [[43, 294]], "322602": [[1, 69], [72, 72]], "322603": [[1, 10]], "322605": [[1, 280]], "322617": [[1, 601]], "322625": [[41, 484], [492, 1167]], "322633": [[1, 249]], "323414": [[1, 46]], "323423": [[1, 136]], "323470": [[38, 172], [176, 218], [223, 266]], "323471": [[1, 238]], "323472": [[1, 64]], "323473": [[1, 227]], "323474": [[1, 355]], "323475": [[1, 77]], "323487": [[42, 177], [184, 498]], "323488": [[1, 514], [555, 734], [738, 793]], "323492": [[1, 33]], "323493": [[1, 144]], "323495": [[1, 187]], "323524": [[25, 561]], "323525": [[1, 91], [97, 1126]], "323526": [[1, 248], [253, 466]], "323693": [[38, 151]], "323696": [[1, 257]], "323702": [[1, 808]], "323725": [[18, 346]], "323726": [[1, 60]], "323727": [[1, 83], [88, 677], [682, 813], [819, 822], [826, 987]], "323755": [[27, 815], [818, 823], [826, 826], [828, 830], [833, 861], [864, 964]], "323775": [[38, 81], [84, 171]], "323778": [[1, 934]], "323790": [[45, 948]], "323794": [[1, 68]], "323841": [[46, 510]], "323857": [[1, 357]], "323940": [[49, 1567]], "323954": [[1, 77]], "323976": [[31, 85]], "323978": [[1, 73]], "323980": [[1, 202]], "323983": [[1, 188]], "323997": [[1, 498]], "324021": [[44, 819]], "324022": [[1, 554]], "324077": [[54, 710], [712, 753]], "324201": [[20, 834], [837, 1385]], "324202": [[1, 240]], "324205": [[1, 163]], "324206": [[1, 149]], "324207": [[1, 34]], "324209": [[1, 142]], "324237": [[33, 236]], "324245": [[23, 1681]], "324293": [[39, 1440], [1442, 2176], [2178, 2342]], "324315": [[1, 200], [203, 204]], "324318": [[1, 332]], "324420": [[1, 625]], "324729": [[1, 193]], "324747": [[63, 1139]], "324764": [[1, 150]], "324765": [[1, 481]], "324769": [[1, 328]], "324772": [[1, 165]], "324785": [[77, 664]], "324791": [[1, 1217]], "324835": [[40, 230], [302, 369]], "324840": [[1, 96]], "324841": [[1, 1347]], "324846": [[1, 151], [154, 517]], "324878": [[62, 111], [113, 175], [180, 1800]], "324897": [[30, 170]], "324970": [[1, 425], [428, 598], [606, 632], [634, 1529], [1532, 2195]], "324980": [[39, 917], [919, 954], [956, 968], [1005, 1042], [1044, 2340]], "324997": [[29, 150]], "324998": [[1, 368]], "324999": [[1, 14]], "325000": [[1, 371]], "325001": [[1, 105], [108, 171], [173, 595]], "325022": [[45, 1594]], "325057": [[42, 383]], "325097": [[40, 96]], "325098": [[1, 8]], "325099": [[1, 394]], "325100": [[1, 254]], "325101": [[1, 462], [464, 485]], "325110": [[1, 21]], "325117": [[1, 533]], "325159": [[48, 266]], "325168": [[1, 21]], "325169": [[1, 23]], "325170": [[1, 692], [694, 1205]], "325172": [[1, 267], [269, 485]]}
diff --git a/bucoffea/data/json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt b/bucoffea/data/json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt
new file mode 100644
index 000000000..e8f503a61
--- /dev/null
+++ b/bucoffea/data/json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt
@@ -0,0 +1,648 @@
+{
+  "315257": [[1, 88], [91, 92]],
+  "315259": [[1, 172]],
+  "315264": [[32, 261]],
+  "315265": [[4, 58]],
+  "315267": [[1, 244]],
+  "315270": [[1, 633]],
+  "315322": [[23, 118], [122, 1354]],
+  "315339": [[37, 654]],
+  "315357": [[44, 732], [736, 770], [780, 831]],
+  "315361": [[40, 619]],
+  "315363": [[1, 35], [37, 47], [49, 67], [69, 80], [82, 90]],
+  "315366": [[10, 61], [67, 750]],
+  "315420": [[28, 920], [924, 942], [954, 1748]],
+  "315488": [[42, 843]],
+  "315489": [[1, 653], [672, 709]],
+  "315490": [[1, 24]],
+  "315506": [[13, 100]],
+  "315510": [[1, 345]],
+  "315512": [[1, 1122]],
+  "315543": [[55, 171]],
+  "315555": [[22, 97]],
+  "315556": [[1, 26]],
+  "315557": [[1, 279]],
+  "315640": [[46, 87]],
+  "315641": [[1, 4]],
+  "315642": [[1, 92]],
+  "315644": [[1, 184]],
+  "315645": [[1, 40], [47, 390], [395, 565], [567, 594]],
+  "315646": [[1, 1033]],
+  "315647": [[1, 58]],
+  "315648": [[1, 110]],
+  "315689": [[24, 1127], [1180, 1186]],
+  "315690": [[10, 654]],
+  "315702": [[38, 113]],
+  "315703": [[1, 545]],
+  "315704": [[1, 61]],
+  "315705": [[1, 160],[162,162],[164,700]],
+  "315713": [[35, 359], [374, 385], [400, 1123]],
+  "315721": [[33, 50], [56, 626]],
+  "315741": [[34, 92]],
+  "315764": [[37, 309]],
+  "315770": [[39, 332]],
+  "315784": [[29, 33], [40, 156], [158, 161]],
+  "315785": [[1, 198], [201, 305]],
+  "315786": [[1, 72]],
+  "315790": [[1, 716], [718, 922]],
+  "315800": [[41, 621]],
+  "315801": [[1, 344]],
+  "315840": [[33, 1154]],
+  "315973": [[39, 240], [262, 914]],
+  "315974": [[1, 71]],
+  "316058": [[42, 405]],
+  "316059": [[1, 321], [323, 567]],
+  "316060": [[1, 935]],
+  "316061": [[1, 23], [194, 206]],
+  "316062": [[1, 4]],
+  "316082": [[37, 407]],
+  "316110": [[1, 210]],
+  "316111": [[1, 48]],
+  "316113": [[1, 64]],
+  "316114": [[1, 777], [779, 1562]],
+  "316153": [[1, 770]],
+  "316186": [[38, 81]],
+  "316187": [[1, 1091], [1093, 1100], [1207, 2077]],
+  "316199": [[33, 1197]],
+  "316200": [[1, 10]],
+  "316201": [[1, 498]],
+  "316202": [[1, 403]],
+  "316216": [[25, 466]],
+  "316217": [[1, 264]],
+  "316218": [[1, 1008]],
+  "316219": [[1, 283]],
+  "316239": [[38, 626]],
+  "316240": [[1, 1224]],
+  "316241": [[1, 325]],
+  "316271": [[36, 121]],
+  "316361": [
+    [22, 124],
+    [126, 131],
+    [133, 135],
+    [137, 137],
+    [139, 142],
+    [144, 145],
+    [147, 147],
+    [149, 159],
+    [161, 174],
+    [176, 178],
+    [180, 189],
+    [191, 197],
+    [199, 208],
+    [210, 223]
+  ],
+  "316362": [
+    [1, 208],
+    [210, 212],
+    [214, 225],
+    [227, 242],
+    [244, 269],
+    [271, 319],
+    [332, 392],
+    [394, 395],
+    [397, 402],
+    [404, 404],
+    [406, 410],
+    [412, 412],
+    [414, 418],
+    [420, 428],
+    [430, 450]
+  ],
+  "316363": [[1, 39], [41, 49]],
+  "316377": [[19, 19], [21, 40]],
+  "316378": [[1, 29]],
+  "316379": [[1, 70]],
+  "316380": [[1, 708], [714, 1213]],
+  "316455": [[36, 71]],
+  "316457": [[1, 1454]],
+  "316469": [[17, 444]],
+  "316470": [[1, 476]],
+  "316472": [[1, 70], [76, 333]],
+  "316505": [[44, 205], [207, 921], [923, 1364]],
+  "316569": [[20, 703], [742, 1945]],
+  "316590": [[17, 526]],
+  "316613": [[49, 241]],
+  "316615": [[1, 338]],
+  "316666": [[1, 981]],
+  "316667": [[1, 197]],
+  "316700": [[46, 346], [388, 397]],
+  "316701": [[1, 479]],
+  "316702": [[1, 388]],
+  "316715": [[33, 45]],
+  "316716": [[1, 181]],
+  "316717": [[1, 192]],
+  "316718": [[1, 311]],
+  "316719": [[1, 91], [100, 144]],
+  "316720": [[1, 182]],
+  "316721": [[1, 15]],
+  "316722": [[1, 751]],
+  "316723": [[1, 64]],
+  "316758": [[11, 1609]],
+  "316766": [[51, 1920], [1922, 2199]],
+  "316876": [[34, 38], [40, 644]],
+  "316877": [[1, 164], [171, 401]],
+  "316879": [[1, 156]],
+  "316928": [[40, 188]],
+  "316985": [[33, 503]],
+  "316993": [[44, 254]],
+  "316994": [[1, 14]],
+  "316995": [[1, 623]],
+  "317080": [[41, 66]],
+  "317087": [[43, 177], [213, 222], [257, 852]],
+  "317089": [[1, 1003]],
+  "317182": [[47, 63], [65, 1424]],
+  "317212": [[36, 175]],
+  "317213": [[1, 375]],
+  "317279": [[43, 508]],
+  "317291": [[34, 824]],
+  "317292": [[1, 330]],
+  "317297": [[1, 283], [347, 760]],
+  "317319": [[44, 182]],
+  "317320": [[1, 326], [333, 411], [413, 1827]],
+  "317338": [[66, 107]],
+  "317339": [[1, 163]],
+  "317340": [[1, 418]],
+  "317382": [[58, 128]],
+  "317383": [[1, 58]],
+  "317391": [[39, 46]],
+  "317392": [[1, 1116], [1119, 1900]],
+  "317435": [[1, 1397]],
+  "317438": [[1, 68], [71, 309]],
+  "317475": [[33, 89], [105, 115]],
+  "317478": [[1, 23]],
+  "317484": [[1, 448], [467, 514], [519, 545]],
+  "317488": [[1, 844]],
+  "317527": [[41, 1487]],
+  "317591": [[43, 334]],
+  "317626": [[40, 2045]],
+  "317640": [[29, 829]],
+  "317641": [[1, 1390]],
+  "317648": [[45, 139]],
+  "317649": [[1, 621]],
+  "317650": [[1, 1304]],
+  "317661": [[35, 1256]],
+  "317663": [[1, 858]],
+  "317683": [[83, 402]],
+  "317696": [[38, 682]],
+  "318733": [[1, 33]],
+  "318828": [[54, 123]],
+  "318872": [[16, 287]],
+  "318874": [[1, 320]],
+  "318876": [[1, 161]],
+  "318877": [[1, 615]],
+  "319077": [[52, 92]],
+  "319337": [[48, 2240]],
+  "319347": [[40, 690]],
+  "319348": [[1, 37]],
+  "319349": [[1, 148]],
+  "319449": [[35, 559], [562, 734]],
+  "319450": [[1, 287], [290, 683]],
+  "319456": [[138, 346]],
+  "319459": [[1, 78]],
+  "319486": [[38, 103]],
+  "319503": [[1, 317]],
+  "319524": [[36, 1459]],
+  "319526": [[1, 282]],
+  "319528": [[1, 259]],
+  "319579": [[41, 3168]],
+  "319625": [[17, 206]],
+  "319639": [[31, 1509]],
+  "319656": [[51, 310]],
+  "319657": [[1, 167]],
+  "319658": [[1, 225]],
+  "319659": [[1, 87]],
+  "319678": [[36, 294]],
+  "319687": [[46, 90]],
+  "319697": [[47, 482], [490, 490]],
+  "319698": [[1, 312]],
+  "319756": [[44, 1966]],
+  "319840": [[41, 388]],
+  "319841": [[1, 167]],
+  "319847": [[49, 51]],
+  "319848": [[1, 53]],
+  "319849": [[1, 492]],
+  "319851": [[1, 4]],
+  "319853": [[1, 40], [47, 262]],
+  "319854": [[1, 225]],
+  "319908": [[1, 40], [43, 53]],
+  "319909": [[1, 7]],
+  "319910": [[1, 983]],
+  "319912": [[1, 59]],
+  "319913": [[1, 56]],
+  "319914": [[1, 32]],
+  "319915": [[1, 416]],
+  "319941": [[43, 298]],
+  "319942": [[1, 50]],
+  "319950": [[38, 205]],
+  "319991": [[46, 882]],
+  "319992": [[1, 264]],
+  "319993": [[1, 955]],
+  "320002": [[52, 192]],
+  "320006": [[1, 34], [36, 341]],
+  "320010": [[1, 330]],
+  "320011": [[1, 302]],
+  "320012": [[1, 99]],
+  "320023": [[17, 292]],
+  "320024": [[1, 410]],
+  "320025": [[1, 113]],
+  "320026": [[1, 204]],
+  "320038": [[43, 663]],
+  "320039": [[1, 30]],
+  "320040": [[1, 737]],
+  "320059": [[1, 105]],
+  "320060": [[1, 42]],
+  "320061": [[1, 49]],
+  "320062": [[1, 21]],
+  "320063": [[1, 64]],
+  "320064": [[1, 200]],
+  "320065": [[1, 920]],
+  "320673": [[35, 901]],
+  "320674": [[1, 599]],
+  "320688": [[49, 531]],
+  "320712": [[39, 242]],
+  "320757": [[51, 382]],
+  "320804": [[46, 1274]],
+  "320807": [[1, 7]],
+  "320809": [[1, 716]],
+  "320821": [[41, 221]],
+  "320822": [[1, 523]],
+  "320823": [[1, 360]],
+  "320824": [[1, 1051]],
+  "320838": [[93, 357]],
+  "320840": [[1, 471]],
+  "320841": [[1, 205]],
+  "320853": [[41, 369]],
+  "320854": [[1, 125]],
+  "320855": [[1, 565]],
+  "320856": [[1, 159]],
+  "320857": [[1, 272]],
+  "320858": [[1, 230]],
+  "320859": [[1, 40]],
+  "320887": [[49, 321]],
+  "320888": [[1, 26]],
+  "320916": [[2, 25]],
+  "320917": [[1, 1926]],
+  "320920": [[1, 178]],
+  "320933": [[40, 214]],
+  "320934": [[1, 831]],
+  "320936": [[1, 407]],
+  "320941": [[1, 93]],
+  "320980": [[44, 142]],
+  "320995": [[26, 214]],
+  "320996": [[1, 380]],
+  "321004": [[39, 188]],
+  "321005": [[1, 61]],
+  "321006": [[1, 162]],
+  "321007": [[1, 831]],
+  "321009": [[1, 85]],
+  "321010": [[1, 342]],
+  "321011": [[1, 213]],
+  "321012": [[1, 35], [190, 201]],
+  "321051": [[58, 1179]],
+  "321055": [[1, 302], [304, 326], [328, 340], [368, 759]],
+  "321067": [[39, 225], [232, 639]],
+  "321068": [[1, 715]],
+  "321069": [[1, 313]],
+  "321119": [[45, 214]],
+  "321121": [[1, 47]],
+  "321122": [[1, 395]],
+  "321124": [[1, 819]],
+  "321126": [[1, 493]],
+  "321134": [[33, 70]],
+  "321138": [[1, 741]],
+  "321140": [[1, 798]],
+  "321149": [
+    [35, 86],
+    [88, 1424],
+    [1426, 1475],
+    [1478, 1553],
+    [1558, 1575],
+    [1578, 1588],
+    [1591, 1743]
+  ],
+  "321165": [[1, 8]],
+  "321166": [[1, 10]],
+  "321167": [
+    [1, 141],
+    [143, 143],
+    [145, 510],
+    [512, 552],
+    [554, 691],
+    [693, 923]
+  ],
+  "321177": [
+    [38, 74],
+    [77, 214],
+    [216, 232],
+    [234, 247],
+    [249, 321],
+    [323, 365],
+    [367, 455]
+  ],
+  "321178": [[5, 78]],
+  "321218": [[49, 962]],
+  "321219": [[1, 934]],
+  "321221": [[1, 40]],
+  "321230": [[41, 124]],
+  "321231": [[1, 59]],
+  "321232": [[1, 30]],
+  "321233": [[1, 727]],
+  "321262": [[1, 4]],
+  "321283": [[48, 357]],
+  "321294": [[1, 62]],
+  "321295": [
+    [1, 307],
+    [309, 316],
+    [318, 384],
+    [390, 394],
+    [396, 604],
+    [606, 616],
+    [619, 646],
+    [649, 690],
+    [693, 754]
+  ],
+  "321296": [[1, 24], [34, 41], [44, 67]],
+  "321305": [[20, 2600], [2605, 2651]],
+  "321311": [[1, 10]],
+  "321312": [[1, 768]],
+  "321313": [[1, 408]],
+  "321393": [[1, 127], [134, 148]],
+  "321396": [[1, 1475]],
+  "321397": [[1, 365]],
+  "321414": [[31, 1283]],
+  "321415": [[1, 804]],
+  "321431": [[30, 189]],
+  "321432": [[1, 47]],
+  "321433": [[1, 125]],
+  "321434": [[1, 642]],
+  "321436": [[1, 710]],
+  "321457": [[43, 451], [453, 1888]],
+  "321461": [[1, 149]],
+  "321475": [[50, 518], [526, 2084]],
+  "321710": [[1, 57]],
+  "321712": [[1, 2], [16, 54], [57, 115], [117, 263]],
+  "321730": [[2, 257], [259, 291]],
+  "321732": [
+    [1, 127],
+    [129, 181],
+    [185, 189],
+    [192, 245],
+    [248, 252],
+    [254, 373],
+    [375, 381],
+    [386, 386],
+    [389, 392],
+    [395, 424],
+    [426, 432],
+    [434, 448],
+    [450, 452],
+    [454, 459],
+    [467, 586],
+    [589, 680],
+    [682, 686],
+    [689, 903],
+    [905, 973],
+    [975, 1448]
+  ],
+  "321735": [[1, 146]],
+  "321755": [[33, 361], [363, 470], [472, 473], [475, 487], [489, 729]],
+  "321758": [
+    [1, 47],
+    [49, 75],
+    [77, 121],
+    [128, 130],
+    [146, 148],
+    [151, 155],
+    [161, 165],
+    [168, 189]
+  ],
+  "321760": [
+    [1, 171],
+    [175, 205],
+    [207, 238],
+    [240, 258],
+    [260, 420],
+    [422, 520],
+    [526, 586],
+    [588, 593],
+    [598, 602],
+    [604, 607],
+    [613, 716],
+    [719, 721],
+    [727, 788],
+    [794, 818],
+    [822, 824],
+    [828, 830],
+    [834, 836],
+    [840, 841],
+    [845, 855]
+  ],
+  "321773": [[11, 14], [25, 35], [39, 52], [54, 79]],
+  "321774": [[1, 12], [14, 52], [54, 119]],
+  "321775": [[1, 12], [14, 14]],
+  "321776": [[1, 12], [15, 19], [30, 45]],
+  "321777": [[1, 81], [83, 169], [174, 176], [192, 207]],
+  "321778": [[8, 150]],
+  "321780": [
+    [1, 332],
+    [336, 338],
+    [342, 346],
+    [351, 357],
+    [359, 360],
+    [362, 371],
+    [374, 383],
+    [392, 412],
+    [414, 420],
+    [422, 493],
+    [496, 499],
+    [502, 503],
+    [505, 508],
+    [517, 518]
+  ],
+  "321781": [
+    [6, 37],
+    [53, 56],
+    [58, 66],
+    [69, 69],
+    [77, 180],
+    [186, 209],
+    [212, 265],
+    [269, 274],
+    [276, 290],
+    [293, 312],
+    [316, 410],
+    [412, 427]
+  ],
+  "321813": [[32, 352]],
+  "321815": [[1, 23]],
+  "321817": [[1, 536]],
+  "321818": [[1, 690]],
+  "321820": [[1, 214]],
+  "321831": [[25, 781]],
+  "321832": [[1, 389], [403, 510]],
+  "321833": [[1, 407]],
+  "321834": [[1, 333]],
+  "321879": [
+    [39, 47],
+    [50, 52],
+    [55, 68],
+    [71, 73],
+    [77, 89],
+    [93, 95],
+    [99, 111],
+    [114, 116],
+    [120, 132],
+    [136, 138],
+    [141, 154],
+    [157, 159],
+    [163, 175],
+    [178, 181],
+    [185, 197],
+    [200, 202],
+    [207, 218],
+    [222, 356]
+  ],
+  "321880": [[1, 41], [44, 132]],
+  "321887": [[54, 948]],
+  "321908": [[43, 472]],
+  "321909": [[1, 208], [210, 1654]],
+  "321917": [[4, 156], [164, 808]],
+  "321919": [[1, 6]],
+  "321933": [[43, 232], [235, 326]],
+  "321960": [[18, 47]],
+  "321961": [[1, 354]],
+  "321973": [[37, 746], [748, 968], [972, 1253]],
+  "321975": [[1, 866]],
+  "321988": [[45, 996], [1106, 1486]],
+  "321990": [[1, 471]],
+  "322013": [[14, 22]],
+  "322014": [[1, 17]],
+  "322022": [[42, 185], [201, 1805]],
+  "322040": [[32, 70]],
+  "322057": [[38, 58]],
+  "322068": [[51, 724]],
+  "322079": [[39, 200], [216, 393], [409, 428]],
+  "322106": [[48, 871]],
+  "322113": [[48, 159]],
+  "322118": [[1, 516], [530, 874]],
+  "322179": [[43, 820], [823, 1783]],
+  "322201": [[39, 266]],
+  "322204": [[1, 280], [282, 301], [303, 331], [337, 1143]],
+  "322222": [[1, 526]],
+  "322252": [[42, 1586]],
+  "322317": [[48, 101]],
+  "322319": [[1, 163]],
+  "322322": [[1, 170], [267, 1205]],
+  "322324": [[1, 416]],
+  "322332": [[37, 1055]],
+  "322348": [[40, 1505]],
+  "322355": [[36, 137]],
+  "322356": [[1, 779]],
+  "322381": [[45, 577]],
+  "322407": [[46, 582]],
+  "322430": [[46, 794]],
+  "322431": [[1, 53], [59, 1166]],
+  "322480": [[60, 408]],
+  "322492": [[1, 1386]],
+  "322510": [[37, 45]],
+  "322599": [[43, 294]],
+  "322602": [[1, 69], [72, 72]],
+  "322603": [[1, 10]],
+  "322605": [[1, 280]],
+  "322617": [[1, 601]],
+  "322625": [[41, 484], [492, 1167]],
+  "322633": [[1, 249]],
+  "323414": [[1, 46]],
+  "323423": [[1, 136]],
+  "323470": [[38, 172], [176, 218], [223, 266]],
+  "323471": [[1, 238]],
+  "323472": [[1, 64]],
+  "323473": [[1, 227]],
+  "323474": [[1, 355]],
+  "323475": [[1, 77]],
+  "323487": [[42, 177], [184, 498]],
+  "323488": [[1, 514], [555, 734], [738, 793]],
+  "323492": [[1, 33]],
+  "323493": [[1, 144]],
+  "323495": [[1, 187]],
+  "323524": [[25, 561]],
+  "323525": [[1, 91], [97, 1126]],
+  "323526": [[1, 248], [253, 466]],
+  "323693": [[38, 151]],
+  "323696": [[1, 257]],
+  "323702": [[1, 808]],
+  "323725": [[18, 346]],
+  "323726": [[1, 60]],
+  "323727": [[1, 83], [88, 677], [682, 813], [819, 822], [826, 987]],
+  "323755": [
+    [27, 815],
+    [818, 823],
+    [826, 826],
+    [828, 830],
+    [833, 861],
+    [864, 964]
+  ],
+  "323775": [[38, 81], [84, 171]],
+  "323778": [[1, 934]],
+  "323790": [[45, 948]],
+  "323794": [[1, 68]],
+  "323841": [[46, 510]],
+  "323857": [[1, 357]],
+  "323940": [[49, 1567]],
+  "323954": [[1, 77]],
+  "323976": [[31, 85]],
+  "323978": [[1, 73]],
+  "323980": [[1, 202]],
+  "323983": [[1, 188]],
+  "323997": [[1, 498]],
+  "324021": [[44, 819]],
+  "324022": [[1, 554]],
+  "324077": [[54, 710], [712, 753]],
+  "324201": [[20, 834], [837, 1385]],
+  "324202": [[1, 240]],
+  "324205": [[1, 163]],
+  "324206": [[1, 149]],
+  "324207": [[1, 34]],
+  "324209": [[1, 142]],
+  "324237": [[33, 236]],
+  "324245": [[23, 1681]],
+  "324293": [[39, 1440], [1442, 2176], [2178, 2342]],
+  "324315": [[1, 200], [203, 204]],
+  "324318": [[1, 332]],
+  "324420": [[1, 625]],
+  "324729": [[1, 193]],
+  "324747": [[63, 1139]],
+  "324764": [[1, 150]],
+  "324765": [[1, 481]],
+  "324769": [[1, 328]],
+  "324772": [[1, 165]],
+  "324785": [[77, 664]],
+  "324791": [[1, 1217]],
+  "324835": [[40, 230], [302, 369]],
+  "324840": [[1, 96]],
+  "324841": [[1, 1347]],
+  "324846": [[1, 151], [154, 517]],
+  "324878": [[62, 111], [113, 175], [180, 1800]],
+  "324897": [[30, 170]],
+  "324970": [[1, 425], [428, 598], [606, 632], [634, 1529], [1532, 2195]],
+  "324980": [[39, 917], [919, 954], [956, 968], [1005, 1042], [1044, 2340]],
+  "324997": [[29, 150]],
+  "324998": [[1, 368]],
+  "324999": [[1, 14]],
+  "325000": [[1, 371]],
+  "325001": [[1, 105], [108, 171], [173, 595]],
+  "325022": [[45, 1594]],
+  "325057": [[42, 383]],
+  "325097": [[40, 96]],
+  "325098": [[1, 8]],
+  "325099": [[1, 394]],
+  "325100": [[1, 254]],
+  "325101": [[1, 462], [464, 485]],
+  "325110": [[1, 21]],
+  "325117": [[1, 533]],
+  "325159": [[48, 266]],
+  "325168": [[1, 21]],
+  "325169": [[1, 23]],
+  "325170": [[1, 692], [694, 1205]],
+  "325172": [[1, 267], [269, 485]]
+}

From 970502b518dff0cdcf05f1dad95353730928ce1b Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Sat, 18 Mar 2023 14:31:39 +0100
Subject: [PATCH 10/43] add printing

---
 bucoffea/execute/buexec | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index 13f334446..2bc5c7245 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -155,10 +155,13 @@ def do_submit(args):
         print("WARNING: --no-prefetch is deprecated. Prefetching is disabled by default. Use --prefetch  if you want to turn it back on")
 
     if args.datasrc == 'das':
+        print("Loading files from das")
         dataset_files = files_from_das(regex=args.dataset)
     elif args.datasrc == 'ac':
+        print("Loading files from ac")
         dataset_files = files_from_ac(regex=args.dataset)
     else:
+        print("Loading files from eos")
         dataset_files = files_from_eos(regex=args.dataset)
 
     # Test mode: One file per data set
@@ -204,6 +207,8 @@ def do_submit(args):
     input_files = []
     if args.send_pack:
         gridpack_path = pjoin(subdir, 'gridpack.tgz')
+        if os.path.exists(gridpack_path) and args.force:
+            os.remove(gridpack_path)
         pack_repo(gridpack_path)
         input_files.append(gridpack_path)
 

From a3bdfd0b8b8dbfed542aacb1fec94205118de18a Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 17:11:12 +0100
Subject: [PATCH 11/43] update xsec

---
 bucoffea/data/datasets/xs/xs_UL.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/bucoffea/data/datasets/xs/xs_UL.yml b/bucoffea/data/datasets/xs/xs_UL.yml
index fd79cbf34..dd939a961 100644
--- a/bucoffea/data/datasets/xs/xs_UL.yml
+++ b/bucoffea/data/datasets/xs/xs_UL.yml
@@ -126,6 +126,8 @@ DYJetsToLL_M-50_HT-800to1200-MLM_2017:
   gen: 0.567
 DYJetsToLL_M-50_HT-800to1200-MLM_2018:
   gen: 0.5668
+DYJetsToLL_LHEFilterPtZ-0To50_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 99.06
 DYJetsToLL_LHEFilterPtZ-100To250_MatchEWPDG20-amcatnloFXFX_2017:
   gen: 99.06
 DYJetsToLL_LHEFilterPtZ-250To400_MatchEWPDG20-amcatnloFXFX_2017:
@@ -136,6 +138,16 @@ DYJetsToLL_LHEFilterPtZ-50To100_MatchEWPDG20-amcatnloFXFX_2017:
   gen: 401.2
 DYJetsToLL_LHEFilterPtZ-650ToInf_MatchEWPDG20-amcatnloFXFX_2017:
   gen: 0.04721
+DYJetsToLL_LHEFilterPtZ-100To250_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 99.06
+DYJetsToLL_LHEFilterPtZ-250To400_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 3.637
+DYJetsToLL_LHEFilterPtZ-400To650_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 0.5107
+DYJetsToLL_LHEFilterPtZ-50To100_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 401.2
+DYJetsToLL_LHEFilterPtZ-650ToInf_MatchEWPDG20-amcatnloFXFX_2018:
+  gen: 0.04721
 DYJetsToLL_Pt-100To250_MatchEWPDG20-amcatnloFXFX_2017:
   gen: 94.2
 DYJetsToLL_Pt-100To250_MatchEWPDG20-amcatnloFXFX_2018:
@@ -306,6 +318,8 @@ GJets_DR-0p4_HT-400To600-MLM_2018:
   gen: 126.2
 GJets_DR-0p4_HT-40To100-MLM_2016:
   gen: 17520.0
+GJets_DR-0p4_HT-40To100-MLM_2018:
+  gen: 17520.0
 GJets_DR-0p4_HT-600ToInf-MLM_2016:
   gen: 44.54
 GJets_DR-0p4_HT-600ToInf-MLM_2017:

From 1cc1f3b18bb79d03ac43b197057866d205abcfbb Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 17:30:17 +0100
Subject: [PATCH 12/43] Add new plots for PN score

---
 bucoffea/plot/plotter.py                             | 11 ++++++++---
 bucoffea/plot/studies/stack_plot/distributions.py    | 12 ++++++++----
 .../plot/studies/stack_plot/get_hf_noise_estimate.py |  3 +++
 bucoffea/plot/studies/stack_plot/plot_data_mc.py     |  4 ++++
 bucoffea/vbfhinv/definitions.py                      |  2 ++
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index 621fd1aa1..f1b88f961 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -35,7 +35,8 @@
 recoil_bins_2016 = [ 250,  280,  310,  340,  370,  400,  430,  470,  510, 550,  590,  640,  690,  740,  790,  840,  900,  960, 1020, 1090, 1160, 1250, 1400]
 
 binnings = {
-    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500.]),
+    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [50, 100., 200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500.]),
+    'particlenet_score': Bin('score', r'DNN score', 50, 0, 1),
     'cnn_score': Bin('score', r'CNN score', 25, 0, 1),
     'ak4_pt0': Bin('jetpt',r'Leading AK4 jet $p_{T}$ (GeV)',list(range(80,600,20)) + list(range(600,1000,20)) ),
     'ak4_pt1': Bin('jetpt',r'Trailing AK4 jet $p_{T}$ (GeV)',list(range(40,600,20)) + list(range(600,1000,20)) ),
@@ -88,6 +89,7 @@
     'dphitkpf' : (1e0,1e9),
     'met' : (1e-3,1e5),
     'ak4_mult' : (1e-1,1e8),
+    'particlenet_score' : (1e-1,1e4),
 }
 
 legend_labels = {
@@ -158,7 +160,7 @@
     '.*HF (N|n)oise.*' : (174, 126, 230),
 }
 
-def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distribution='mjj', plot_signal=True, mcscale=1, binwnorm=None, fformats=['pdf'], qcd_file=None, jes_file=None, ulxs=True):
+def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distribution='mjj', plot_signal=True, mcscale=1, binwnorm=None, fformats=['pdf'], qcd_file=None, jes_file=None, ulxs=True, is_blind=False):
     """
     Main plotter function to create a stack plot of data to background estimation (from MC).
     """
@@ -191,6 +193,8 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     # This sorting messes up in SR for some reason
     if data_region != 'sr_vbf':
         h.axis('dataset').sorting = 'integral'
+    if distribution== 'particlenet_score':
+        h = h.integrate('score_type', 'VBF-like')
 
     h_data = h.integrate('region', data_region)
     h_mc = h.integrate('region', mc_region)
@@ -238,7 +242,8 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     fig, ax, rax = fig_ratio()
     
     # Plot data
-    hist.plot1d(h_data[data], ax=ax, overflow=overflow, overlay='dataset', binwnorm=binwnorm, error_opts=data_err_opts)
+    if not ('sr_vbf' in data_region and is_blind):
+        hist.plot1d(h_data[data], ax=ax, overflow=overflow, overlay='dataset', binwnorm=binwnorm, error_opts=data_err_opts)
 
     xedges = h_data.integrate('dataset').axes()[0].edges(overflow=overflow)
 
diff --git a/bucoffea/plot/studies/stack_plot/distributions.py b/bucoffea/plot/studies/stack_plot/distributions.py
index df1b2f835..a13fb298a 100755
--- a/bucoffea/plot/studies/stack_plot/distributions.py
+++ b/bucoffea/plot/studies/stack_plot/distributions.py
@@ -11,19 +11,21 @@
     'dphijj',
     #'cnn_score',
     #'dnn_score',
+    'particlenet_score',
     'recoil',
     'ak4_eta0',
     'ak4_eta1',
     'ak4_pt0',
     'ak4_pt1',
-    #'ak4_central_eta',
-    #'ak4_forward_eta',
-    #'dphijr',
+    # 'ak4_central_eta',
+    # 'ak4_forward_eta',
+    # 'dphijr',
 ]
 
 # Distributions to plot for each region
 distributions = {
     'sr_vbf' : common_distributions + ['ak4_nef0', 'ak4_nef1', 'ak4_nhf0', 'ak4_nhf1', 'ak4_chf0', 'ak4_chf1'],
+    'sr_vbf_nodijetcut' : common_distributions + ['ak4_nef0', 'ak4_nef1', 'ak4_nhf0', 'ak4_nhf1', 'ak4_chf0', 'ak4_chf1'],
     'cr_1m_vbf' : common_distributions + ['muon_pt', 'muon_eta', 'muon_phi', 'muon_mt'],
     'cr_1e_vbf' : common_distributions + ['electron_pt', 'electron_eta', 'electron_phi', 'electron_mt'],
     'cr_2m_vbf' : common_distributions + ['muon_pt0', 'muon_eta0', 'muon_phi0', 'muon_pt1', 'muon_eta1', 'muon_phi1', 'dimuon_mass'],
@@ -34,7 +36,8 @@
 recoil_bins_2016 = [ 250,  280,  310,  340,  370,  400,  430,  470,  510, 550,  590,  640,  690,  740,  790,  840,  900,  960, 1020, 1090, 1160, 1250, 1400]
 
 binnings = {
-    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500., 5000.]),
+    'particlenet_score': Bin('score', r'DNN score', 50, 0, 1),
+    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [50, 100., 200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500., 5000.]),
     'ak4_pt0': Bin('jetpt',r'Leading AK4 jet $p_{T}$ (GeV)',list(range(80,600,20)) + list(range(600,1000,20)) ),
     'ak4_pt1': Bin('jetpt',r'Trailing AK4 jet $p_{T}$ (GeV)',list(range(40,600,20)) + list(range(600,1000,20)) ),
     'ak4_phi0' : Bin("jetphi", r"Leading AK4 jet $\phi$", 50,-np.pi, np.pi),
@@ -74,4 +77,5 @@
     'dphitkpf' : (1e0,1e9),
     'met' : (1e-3,1e5),
     'ak4_mult' : (1e-1,1e8),
+    'particlenet_score' : (1e-1,1e8),
 }
diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index cf2bc5d56..50db4594d 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -69,6 +69,9 @@ def get_hf_noise_estimate(acc, outtag, outrootfile, distribution, years=[2017, 2
 
     # Get data and MC yields in the QCD CR
     h = h.integrate('region', region_name)
+    if distribution== 'particlenet_score':
+        h = h.integrate('score_type', 'VBF-like')
+
     for year in years:
         # Regular expressions matching data and MC
         data = f'MET_{year}'
diff --git a/bucoffea/plot/studies/stack_plot/plot_data_mc.py b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
index 6b3881a54..7bd8797c3 100755
--- a/bucoffea/plot/studies/stack_plot/plot_data_mc.py
+++ b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
@@ -27,6 +27,7 @@ def make_plot(args):
     for year in args.years:
         data = {
             'sr_vbf' : f'MET_{year}',
+            'sr_vbf_nodijetcut' : f'MET_{year}',
             'cr_1m_vbf' : f'MET_{year}',
             'cr_2m_vbf' : f'MET_{year}',
             'cr_1e_vbf' : f'EGamma_{year}',
@@ -36,6 +37,7 @@ def make_plot(args):
 
         mc = {
             'sr_vbf_no_veto_all' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
+            'sr_vbf_nodijetcut' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
             'cr_1m_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
             'cr_1e_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
             'cr_2m_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX).*{year}'),
@@ -84,6 +86,7 @@ def make_plot(args):
                         ulxs=not args.eoyxs,
                         fformats=args.fformats,
                         binwnorm=1 if distribution == 'mjj' else None,
+                        is_blind=args.blind,
                     )
                 except KeyError as e:
                     print(str(e))
@@ -97,6 +100,7 @@ def commandline():
     parser.add_argument('--distribution', type=str, default='.*', help='Regex specifying the distributions to plot.')
     parser.add_argument('--years', type=int, nargs='*', default=[2017,2018], help='Years to run on.')
     parser.add_argument('--one_fifth_unblind', action='store_true', help='1/5th unblinded data.')
+    parser.add_argument('--blind', action='store_true', help='blind data.')
     parser.add_argument('--fformats', nargs='*', default=['pdf'], help='Output file format for the plots, default is PDF only.')
     parser.add_argument('--jes', action='store_true', help='Plot JES+JER uncertainty bands.')
     parser.add_argument('--eoyxs', action='store_true', help='Use EOY XS for normalization, otherwise use UL XS.')
diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index 84b81c628..58b641f33 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -350,6 +350,8 @@ def add_lists(orig_list, to_add):
         if 'one_fifth_mask' in regions['cr_vbf_qcd']:
             regions['cr_vbf_qcd'].remove('one_fifth_mask')
         regions['cr_vbf_qcd'].append('fail_hf_cuts')
+        if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
+            regions['cr_vbf_qcd_nodijetcut'] = clean_lists(regions['cr_vbf_qcd'], ['mjj','detajj','dphijj'])
 
     # QCD CR to check with deltaphi(jet,MET) cut inverted
     # Will be used to compare the yields with the QCD template obtained from R&S

From 4ee2a6a40905d40ca0b80b33b783023b0ff4a15f Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 17:38:53 +0100
Subject: [PATCH 13/43] update name convention tree

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index af5cdf432..4070b3531 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -673,7 +673,7 @@ def ewk_correction(a, b):
                     
                     # ParticleNet VBF score
                     if len(scores) > 0:
-                        output['tree_float16'][region]["particleNet_vbfScore"]   +=  processor.column_accumulator(np.float16(scores[:, 0]))
+                        output['tree_float16'][region]["particlenet_score"]   +=  processor.column_accumulator(np.float16(scores[:, 0]))
                     
                     # Dataset labels
                     if re.match("VBF_HToInvisible.*M125.*", df["dataset"]):

From 779efb617a89c66bd3b9eef5b8198b1ca0f59e3b Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:04:43 +0100
Subject: [PATCH 14/43] revert additional region stored

---
 bucoffea/config/vbfhinv.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bucoffea/config/vbfhinv.yaml b/bucoffea/config/vbfhinv.yaml
index 29b4febc4..3dfcf614b 100644
--- a/bucoffea/config/vbfhinv.yaml
+++ b/bucoffea/config/vbfhinv.yaml
@@ -241,7 +241,6 @@ default:
       jet_images: False
       tree_regions:
         - sr_vbf_no_veto_all
-        - sr_vbf
     kinematics:
       save: False
       events:

From 8955f61edbb9be98084c58bc327a58544a341b10 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:05:08 +0100
Subject: [PATCH 15/43] compact ak4 definition

---
 bucoffea/monojet/definitions.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/bucoffea/monojet/definitions.py b/bucoffea/monojet/definitions.py
index 11fddf6f1..01bb1c2cf 100644
--- a/bucoffea/monojet/definitions.py
+++ b/bucoffea/monojet/definitions.py
@@ -436,17 +436,13 @@ def setup_candidates(df, cfg):
         ptraw=df['Jet_pt']*(1-df['Jet_rawFactor']),
         nconst=df['Jet_nConstituents'],
         hadflav= 0*df['Jet_pt'] if df['is_data'] else df['Jet_hadronFlavour'],
+        setaeta=df['Jet_hfsigmaEtaEta'],
+        sphiphi=df['Jet_hfsigmaPhiPhi'],
+        hfcentralstripsize=df['Jet_hfcentralEtaStripSize'],
+        hfadjacentstripsize=df['Jet_hfadjacentEtaStripsSize'],
+        btagdf=df['Jet_btagDeepFlavQG'],
     )
 
-    kwargs = {
-        'setaeta': df['Jet_hfsigmaEtaEta'],
-        'sphiphi': df['Jet_hfsigmaPhiPhi'],
-        'hfcentralstripsize': df['Jet_hfcentralEtaStripSize'],
-        'hfadjacentstripsize': df['Jet_hfadjacentEtaStripsSize'],
-        'btagdf': df['Jet_btagDeepFlavQG'],
-    }
-    ak4.add_attributes(**kwargs)
-
     if not df['is_data']:
         ak4.add_attributes(jercorr=df['Jet_corr_JER'])
 

From 2ff8aef8999c90a907243d54ad49766d0e58ebc4 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:09:45 +0100
Subject: [PATCH 16/43] removed unused variables

---
 bucoffea/vbfhinv/vbfhinvProcessor.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index 4070b3531..e653a4073 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -206,7 +206,6 @@ def process(self, df):
         ak4 = ak4[ak4.puid]
 
         # Recalculate MET pt and phi based on npv-corrections
-        met_pt_uncorr, met_phi_uncorr = met_pt, met_phi
         if cfg.MET.XYCORR:
             met_pt, met_phi = met_xy_correction(df, met_pt, met_phi)
 
@@ -243,7 +242,6 @@ def process(self, df):
         df['is_tight_photon'] = photons.mediumId & photons.barrel
 
         # Recoil
-        df['recoil_pt_uncorr'], df['recoil_phi_uncorr'] = recoil(met_pt_uncorr, met_phi_uncorr, electrons, muons, photons)
         df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons)
         df['CaloRecoil_pt'], df['CaloRecoil_phi'] = recoil(df["CaloMET_pt"],df["CaloMET_phi"], electrons, muons, photons)
 

From 242e0163fb9d11572fbfd9c12503a06298324761 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:11:03 +0100
Subject: [PATCH 17/43] rename func

---
 bucoffea/vbfhinv/definitions.py | 54 ++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index 58b641f33..d2010e40f 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -274,10 +274,10 @@ def vbfhinv_accumulator(cfg):
 
 def vbfhinv_regions(cfg):
     # 'inclusive'    # 'veto_b',
-    def clean_lists(orig_list, to_remove):
+    def remove_items(orig_list, to_remove):
         return list(filter(lambda x: x not in to_remove, orig_list))
     
-    def add_lists(orig_list, to_add):
+    def append_items(orig_list, to_add):
         return list(orig_list+ to_add)
     
     common_cuts = [
@@ -333,7 +333,7 @@ def add_lists(orig_list, to_add):
         regions['sr_vbf'].remove('eemitigation')
 
     if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
-        regions['sr_vbf_nodijetcut'] = clean_lists(regions['sr_vbf'], ['mjj','detajj','dphijj'])
+        regions['sr_vbf_nodijetcut'] = remove_items(regions['sr_vbf'], ['mjj','detajj','dphijj'])
 
     # SR without PU weights
     # regions['sr_vbf_no_pu'] = copy.deepcopy(regions['sr_vbf'])
@@ -341,88 +341,88 @@ def add_lists(orig_list, to_add):
 
     # SR without HEM veto
     if cfg.RUN.HEMCHECK:
-        regions['sr_vbf_no_hem_veto'] = clean_lists(regions['sr_vbf'], ['metphihemextveto'])
+        regions['sr_vbf_no_hem_veto'] = remove_items(regions['sr_vbf'], ['metphihemextveto'])
 
     # QCD CR with the HF shape cuts inverted
     if cfg.RUN.QCD_ESTIMATION:
         to_remove = ['central_stripsize_cut', 'sigma_eta_minus_phi']
-        regions['cr_vbf_qcd'] = clean_lists(regions['sr_vbf'], to_remove)
+        regions['cr_vbf_qcd'] = remove_items(regions['sr_vbf'], to_remove)
         if 'one_fifth_mask' in regions['cr_vbf_qcd']:
             regions['cr_vbf_qcd'].remove('one_fifth_mask')
         regions['cr_vbf_qcd'].append('fail_hf_cuts')
         if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
-            regions['cr_vbf_qcd_nodijetcut'] = clean_lists(regions['cr_vbf_qcd'], ['mjj','detajj','dphijj'])
+            regions['cr_vbf_qcd_nodijetcut'] = remove_items(regions['cr_vbf_qcd'], ['mjj','detajj','dphijj'])
 
     # QCD CR to check with deltaphi(jet,MET) cut inverted
     # Will be used to compare the yields with the QCD template obtained from R&S
     if cfg.RUN.REBSMEAR_CHECK:
-        regions['cr_vbf_qcd_rs'] = clean_lists(regions['sr_vbf'], ['mindphijr'])
-        regions['cr_vbf_qcd_rs'] = add_lists(regions['cr_vbf_qcd_rs'], ['mindphijr_inv'])
+        regions['cr_vbf_qcd_rs'] = remove_items(regions['sr_vbf'], ['mindphijr'])
+        regions['cr_vbf_qcd_rs'] = append_items(regions['cr_vbf_qcd_rs'], ['mindphijr_inv'])
     
     # Dimuon CR
     to_add = ['trig_met', 'at_least_one_tight_mu', 'two_muons', 'dimuon_charge', 'dimuon_mass', 'dpfcalo_cr']
     to_remove = ['veto_muo']
-    regions['cr_2m_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    regions['cr_2m_vbf'] = remove_items( append_items(to_add, common_cuts), to_remove)
     
     # Single muon CR
     to_add = ['trig_met', 'at_least_one_tight_mu', 'one_muon', 'dpfcalo_cr']
     to_remove = ['veto_muo']
-    regions['cr_1m_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    regions['cr_1m_vbf'] = remove_items( append_items(to_add, common_cuts), to_remove)
 
     # Dielectron CR
     to_add = ['trig_ele', 'at_least_one_tight_el', 'two_electrons', 'dielectron_charge', 'dielectron_mass', 'dpfcalo_cr']
     to_remove = ['veto_ele']
-    regions['cr_2e_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    regions['cr_2e_vbf'] = remove_items( append_items(to_add, common_cuts), to_remove)
     
     # Single electron CR
     to_add = ['trig_ele', 'at_least_one_tight_el', 'one_electron', 'met_el', 'no_el_in_hem', 'dpfcalo_cr']
     to_remove = ['veto_ele']
-    regions['cr_1e_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    regions['cr_1e_vbf'] = remove_items( append_items(to_add, common_cuts), to_remove)
 
     # Photon CR
     to_add = ['trig_photon', 'at_least_one_tight_photon', 'one_photon', 'photon_pt', 'dpfcalo_cr']
     to_remove = ['veto_photon']
-    regions['cr_g_vbf'] = clean_lists( add_lists(to_add, common_cuts), to_remove)
+    regions['cr_g_vbf'] = remove_items( append_items(to_add, common_cuts), to_remove)
 
     # Z CRs with CaloMETNoLep cut
     if cfg.RUN.CALOMET_CHECK:
         for r in ['cr_2e_vbf', 'cr_2m_vbf']:
-            regions[f'{r}_calocut'] = add_lists(regions[r], 'calo_metptnolep')
+            regions[f'{r}_calocut'] = append_items(regions[r], 'calo_metptnolep')
 
     # VBF signal region where the hard-lepton vetoes are replace
     # with lepton veto weights
     to_add = ['met_sr', 'mindphijm']
     to_remove = ['veto_muo', 'veto_tau', 'veto_ele', 'mindphijr', 'recoil']
-    regions.update(dict([(f"{region}_no_veto_all", add_lists(clean_lists(regions[region], to_remove),to_add)) for region in regions.keys() if region.startswith("sr_")]))
+    regions.update(dict([(f"{region}_no_veto_all", append_items(remove_items(regions[region], to_remove),to_add)) for region in regions.keys() if region.startswith("sr_")]))
 
     # Region with high detajj cut
     if "sr_vbf_detajj_gt_3p0" in cfg.RUN.EXTRA_REGIONS:
-        regions['sr_vbf_detajj_gt_3p0'] = add_lists(regions['sr_vbf_no_veto_all'], ['detajj_gt_3p0'])
+        regions['sr_vbf_detajj_gt_3p0'] = append_items(regions['sr_vbf_no_veto_all'], ['detajj_gt_3p0'])
 
     # VBF signal region without the dphijj cut
     if "sr_vbf_no_dphijj_cut" in cfg.RUN.EXTRA_REGIONS:
-        regions['sr_vbf_no_dphijj_cut'] = clean_lists(regions['sr_vbf_no_veto_all'], ['dphijj'])
+        regions['sr_vbf_no_dphijj_cut'] = remove_items(regions['sr_vbf_no_veto_all'], ['dphijj'])
 
     if cfg.RUN.TRIGGER_STUDY:
         # Trigger studies
         # num = numerator, den = denominator
         # Single Mu region: Remove mjj cut, add SingleMu trigger, toggle MET trigger
         for cut in ['two_central_jets', 'one_jet_forward_one_jet_central', 'two_hf_jets']:
-            regions[f"tr_1m_num_{cut}"] = add_lists(clean_lists(regions['cr_1m_vbf'], ['recoil']), ['trig_mu', 'mu_pt_trig_safe', cut])
-            regions[f"tr_1m_den_{cut}"] = clean_lists(regions[f"tr_1m_num_{cut}"], ['trig_met'])
+            regions[f"tr_1m_num_{cut}"] = append_items(remove_items(regions['cr_1m_vbf'], ['recoil']), ['trig_mu', 'mu_pt_trig_safe', cut])
+            regions[f"tr_1m_den_{cut}"] = remove_items(regions[f"tr_1m_num_{cut}"], ['trig_met'])
 
-            regions[f"tr_2m_num_{cut}"] = add_lists(clean_lists(regions['cr_2m_vbf'], ['mjj']), ['trig_mu', 'mu_pt_trig_safe', cut])
-            regions[f"tr_2m_den_{cut}"] = clean_lists(regions[f"tr_2m_num_{cut}"], ['trig_met'])
+            regions[f"tr_2m_num_{cut}"] = append_items(remove_items(regions['cr_2m_vbf'], ['mjj']), ['trig_mu', 'mu_pt_trig_safe', cut])
+            regions[f"tr_2m_den_{cut}"] = remove_items(regions[f"tr_2m_num_{cut}"], ['trig_met'])
 
-        regions[f"tr_g_notrig_num"] = clean_lists(regions['cr_g_vbf'], ['recoil', 'photon_pt'])
-        regions[f"tr_g_notrig_den"] = clean_lists(regions[f"tr_g_notrig_num"], ['trig_photon'])
+        regions[f"tr_g_notrig_num"] = remove_items(regions['cr_g_vbf'], ['recoil', 'photon_pt'])
+        regions[f"tr_g_notrig_den"] = remove_items(regions[f"tr_g_notrig_num"], ['trig_photon'])
 
         for trgname in cfg.TRIGGERS.HT.GAMMAEFF:
-            regions[f'tr_g_{trgname}_num'] = add_lists(regions[f"tr_g_notrig_num"], [trgname])
-            regions[f'tr_g_{trgname}_den'] = add_lists(regions[f"tr_g_notrig_den"], [trgname])
+            regions[f'tr_g_{trgname}_num'] = append_items(regions[f"tr_g_notrig_num"], [trgname])
+            regions[f'tr_g_{trgname}_den'] = append_items(regions[f"tr_g_notrig_den"], [trgname])
 
-            regions[f'tr_g_{trgname}_photon_pt_trig_cut_num'] = add_lists(regions[f"tr_g_notrig_num"], [trgname, 'photon_pt_trig'])
-            regions[f'tr_g_{trgname}_photon_pt_trig_cut_den'] = add_lists(regions[f"tr_g_notrig_den"], [trgname, 'photon_pt_trig'])
+            regions[f'tr_g_{trgname}_photon_pt_trig_cut_num'] = append_items(regions[f"tr_g_notrig_num"], [trgname, 'photon_pt_trig'])
+            regions[f'tr_g_{trgname}_photon_pt_trig_cut_den'] = append_items(regions[f"tr_g_notrig_den"], [trgname, 'photon_pt_trig'])
 
     return regions
 

From 9bfa8f8231a5fd91580883e71f3b682f8ea0a58e Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:13:30 +0100
Subject: [PATCH 18/43] add doc strings

---
 bucoffea/vbfhinv/definitions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index d2010e40f..d00903115 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -274,11 +274,12 @@ def vbfhinv_accumulator(cfg):
 
 def vbfhinv_regions(cfg):
     # 'inclusive'    # 'veto_b',
-    def remove_items(orig_list, to_remove):
-        return list(filter(lambda x: x not in to_remove, orig_list))
-    
-    def append_items(orig_list, to_add):
-        return list(orig_list+ to_add)
+    def remove_items(original_list, items_to_remove):
+        """Return a new list with items removed from the original list."""
+        return list(filter(lambda x: x not in items_to_remove, original_list))
+    def append_items(original_list, items_to_add):
+        """Return a new list with items appended to the original list."""
+        return list(original_list+ items_to_add)
     
     common_cuts = [
         'filt_met',
@@ -393,6 +394,7 @@ def append_items(orig_list, to_add):
     # with lepton veto weights
     to_add = ['met_sr', 'mindphijm']
     to_remove = ['veto_muo', 'veto_tau', 'veto_ele', 'mindphijr', 'recoil']
+    # For each 'sr' region, created a new region with the same element, removing the cuts in to_remove adding those in to_add
     regions.update(dict([(f"{region}_no_veto_all", append_items(remove_items(regions[region], to_remove),to_add)) for region in regions.keys() if region.startswith("sr_")]))
 
     # Region with high detajj cut

From 08950df8446f96fb8a4f5e15c9f4eaf5f79fd291 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 21 Mar 2023 18:16:48 +0100
Subject: [PATCH 19/43] add default argument to run_quick

---
 bucoffea/scripts/run_quick.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/bucoffea/scripts/run_quick.py b/bucoffea/scripts/run_quick.py
index 96624aca1..2289d117e 100755
--- a/bucoffea/scripts/run_quick.py
+++ b/bucoffea/scripts/run_quick.py
@@ -10,21 +10,27 @@
 def parse_commandline():
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('processor', type=str, help='The processor to be run. (monojet or vbfhinv)')
+    parser.add_argument('processor', nargs='?', default='vbfhinv', type=str, help='The processor to be run. (monojet or vbfhinv)')
     args = parser.parse_args()
 
     return args
 
 def main():
     fileset = {
-        "VBF_HToInvisible_M125_TuneCP5_withDipoleRecoil_2018" : [
+        "VBF_H_2018" : [
             "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/VBF_HToInvisible_M125_TuneCP5_withDipoleRecoil_13TeV_powheg_pythia8/VBF_HToInvisible_M125_withDipoleRecoil_pow_pythia8_2018/230303_180313/0000/tree_1.root"
         ],
-        "MET_ver1_2018D" : [
-            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/MET/MET_ver1_2018D/230306_160535/0000/tree_10.root"
+        "ggH_2018" : [
+            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/GluGlu_HToInvisible_M125_HiggspTgt190_TuneCP5_13TeV_powheg_pythia8/GluGlu_HToInvisible_M125_HiggspTgt190_pow_pythia8_2018/230303_181428/0000/tree_1.root"
         ],
-        "Z1JetsToNuNu_M-50_LHEFilterPtZ-50To150_MatchEWPDG20-amcatnloFXFX_2018" : [
-            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/Z1JetsToNuNu_M-50_LHEFilterPtZ-50To150_MatchEWPDG20_TuneCP5_13TeV-amcatnloFXFX-pythia8/Z1JetsToNuNu_M-50_LHEFilterPtZ-50To150_MatchEWPDG20-amcatnloFXFX_2018/230303_183854/0000/tree_23.root"
+        "MET_2018D" : [
+            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/MET/MET_ver1_2018D/230306_160535/0000/tree_2.root"
+        ],
+        "ZToNuNu_2018" : [
+            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/Z1JetsToNuNu_M-50_LHEFilterPtZ-50To150_MatchEWPDG20_TuneCP5_13TeV-amcatnloFXFX-pythia8/Z1JetsToNuNu_M-50_LHEFilterPtZ-50To150_MatchEWPDG20-amcatnloFXFX_2018/230303_183854/0000/tree_1.root"
+        ],
+        "ZToLL_2018" : [
+            "/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/EWKZ2Jets_ZToLL_M-50_TuneCP5_withDipoleRecoil_13TeV-madgraph-pythia8/EWKZ2Jets_ZToLL_M-50_withDipoleRecoil-mg_2018/230303_182343/0000/tree_1.root",
         ]
     }
 
@@ -67,6 +73,7 @@ def main():
                                     treename='Runs' if args.processor=='sumw' else 'Events',
                                     processor_instance=processorInstance,
                                     executor=processor.futures_executor,
+                                    # executor=processor.iterative_executor,
                                     executor_args={'workers': 1, 'flatten': True},
                                     chunksize=50000,
                                     )

From 84c83104022774d1ea53d58647201f54a247e050 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Wed, 22 Mar 2023 10:49:11 +0100
Subject: [PATCH 20/43] update gitignore

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index d825760ef..53b71378b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,9 +3,12 @@ external
 __pycache__
 *.egg-info
 *.coffea
+*.pkl
+*.root
 *.pdf
 *.png
 *cutflow*.txt
 monitor_log.txt
 submission/
-pkl/
\ No newline at end of file
+pkl/
+INFO.txt
\ No newline at end of file

From 18793db5fe8bf4412cdee301504f480a71041d77 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:27:03 +0100
Subject: [PATCH 21/43] update job splitting

---
 bucoffea/execute/buexec                    |  6 +-
 bucoffea/execute/dataset_split.py          | 38 +++++++++++
 bucoffea/execute/validate_dataset_split.py | 78 ++++++++++++++++++++++
 3 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 bucoffea/execute/dataset_split.py
 create mode 100644 bucoffea/execute/validate_dataset_split.py

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index 2bc5c7245..f9df23773 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -14,6 +14,7 @@ from coffea.util import save
 from bucoffea.execute.dataset_definitions import (files_from_ac,
                                                   files_from_das,
                                                   files_from_eos)
+from bucoffea.execute.dataset_split import get_files_per_job
 from bucoffea.helpers import bucoffea_path, vo_proxy_path, xrootd_format
 from bucoffea.helpers.condor import condor_submit
 from bucoffea.helpers.git import git_rev_parse, git_diff
@@ -216,7 +217,10 @@ def do_submit(args):
         print(f"Writing submission files for dataset: {dataset}.")
 
         if args.filesperjob:
-            nchunk = math.ceil(len(files)/args.filesperjob)
+            filesperjob = get_files_per_job(dataset,time_per_job = 3)
+            if filesperjob<0:
+                filesperjob = args.filesperjob
+            nchunk = math.ceil(len(files)/filesperjob)
             chunks = chunk_by_files(files, nchunk=int(nchunk))
         else:
             chunks = chunk_by_events(files, chunksize=args.eventsperjob, workers=8)
diff --git a/bucoffea/execute/dataset_split.py b/bucoffea/execute/dataset_split.py
new file mode 100644
index 000000000..669103886
--- /dev/null
+++ b/bucoffea/execute/dataset_split.py
@@ -0,0 +1,38 @@
+def get_files_per_job(dataset,time_per_job = 3):
+    # nfile per hour of running time
+    filesperjob_dict ={ 'GluGlu_HToInvisible': 1,
+                        'ttH_HToInvisible': 1, 
+                        'VBF_HToInvisible': 3, 
+                        'WminusH_WToQQ_HToInvisible': 8, 
+                        'WplusH_WToQQ_HToInvisible': 3, 
+                        'ZH_ZToQQ_HToInvisible': 3, 
+                        'DYJetsToLL_LHEFilterPtZ-0To50': 12, 
+                        'DYJetsToLL_LHEFilterPtZ-50To100': 10, 
+                        'DYJetsToLL_LHEFilterPtZ-100To250': 5, 
+                        'DYJetsToLL_LHEFilterPtZ-250To400': 1, 
+                        'DYJetsToLL_LHEFilterPtZ-400To650': 1, 
+                        'DYJetsToLL_LHEFilterPtZ-650ToInf': 1, 
+                        'EWKWMinus2Jets_WToLNu': 4, 
+                        'EWKWPlus2Jets_WToLNu': 3, 
+                        'EWKZ2Jets_ZToLL': 3, 
+                        'EWKZ2Jets_ZToNuNu': 4, 
+                        'GJets_DR-0p4': 30, 
+                        'VBFGamma_5f_DipoleRecoil-mg': 10, 
+                        'WJetsToLNu_Pt-100To250': 4, 
+                        'WJetsToLNu_Pt-250To400': 1, 
+                        'WJetsToLNu_Pt-400To600': 1, 
+                        'WJetsToLNu_Pt-600ToInf': 1, 
+                        'Z1JetsToNuNu': 2, 
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150': 20, 
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-150To250': 1, 
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400': 1, 
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-400ToInf': 1, 
+                        'EGamma': 12, 
+                        'MET': 5,
+                        }
+    # Look up files per job
+    keys_contained = [key for key in filesperjob_dict.keys() if key in dataset]
+    filesperjob = -1
+    if len(keys_contained)==1:
+        filesperjob = int(round(filesperjob_dict[keys_contained[0]] * time_per_job))
+    return filesperjob
\ No newline at end of file
diff --git a/bucoffea/execute/validate_dataset_split.py b/bucoffea/execute/validate_dataset_split.py
new file mode 100644
index 000000000..8b6e338ff
--- /dev/null
+++ b/bucoffea/execute/validate_dataset_split.py
@@ -0,0 +1,78 @@
+import os, glob
+import numpy as np
+
+submitted = []
+info = {}
+nsub = 0
+samples = ['GluGlu_HToInvisible',
+            'ttH_HToInvisible',
+            'VBF_HToInvisible',
+            'WminusH_WToQQ_HToInvisible',
+            'WplusH_WToQQ_HToInvisible',
+            'ZH_ZToQQ_HToInvisible',
+            'DYJetsToLL_LHEFilterPtZ-0To50',
+            'DYJetsToLL_LHEFilterPtZ-50To100',
+            'DYJetsToLL_LHEFilterPtZ-100To250',
+            'DYJetsToLL_LHEFilterPtZ-250To400',
+            'DYJetsToLL_LHEFilterPtZ-400To650',
+            'DYJetsToLL_LHEFilterPtZ-650ToInf',
+            'EWKWMinus2Jets_WToLNu',
+            'EWKWPlus2Jets_WToLNu',
+            'EWKZ2Jets_ZToLL',
+            'EWKZ2Jets_ZToNuNu',
+            'GJets_DR-0p4',
+            'VBFGamma_5f_DipoleRecoil-mg',
+            'WJetsToLNu_Pt-100To250',
+            'WJetsToLNu_Pt-250To400',
+            'WJetsToLNu_Pt-400To600',
+            'WJetsToLNu_Pt-600ToInf',
+            'Z1JetsToNuNu',
+            'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150',
+            'Z2JetsToNuNu_M-50_LHEFilterPtZ-150To250',
+            'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400',
+            'Z2JetsToNuNu_M-50_LHEFilterPtZ-400ToInf',
+            'EGamma',
+            'MET',
+]
+for sample in samples:
+    info[sample] = {}
+    temp = []
+    times = []
+    for fname in glob.glob("submission/PFNANO_V9_17Feb23_PostNanoTools/files/input_*"+sample+"*txt"):
+        with open(fname) as f_:
+            files = [x.replace('\n','') for x in f_.readlines()]
+            nfiles = len(files)
+            temp += files
+        if not os.path.exists(fname.replace('input_','err_')): continue
+        with open(fname.replace('input_','err_')) as f_:
+            t = [x.replace('\n','') for x in f_.readlines() if 'user' in x]
+            if len(t)==0: continue
+            t = t[0].replace('user\t','')
+            times.append(float(t[:t.find('m')])/nfiles)
+    info[sample]['nfiles'] = len(temp)
+    info[sample]['nt'] = len(times)
+    if len(temp)==0:
+        print(sample, temp,times, glob.glob("submission/PFNANO_V9_17Feb23_PostNanoTools/files/input_*"+sample+"*txt"))
+        continue
+    info[sample]['times'] = np.average(times)
+    info[sample]['err'] = np.std(times)
+    info[sample]['min'] = np.min(times)
+    info[sample]['max'] = np.max(times)
+    info[sample]['exp'] = np.max(times)*info[sample]['nfiles']
+    info[sample]['split'] = max(1, info[sample]['exp']/(60),0)
+    info[sample]['filesperjob'] = max(1, 1.*info[sample]['nfiles']/info[sample]['split'])
+    nsub += info[sample]['split']
+    submitted += temp 
+
+all_files = glob.glob("/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/*/*/*/*/*root")
+print(f"Non analysed {len(list(set(all_files)-set(submitted)))} out of {len(all_files)} files")
+
+hours = 1
+nsub_tot = 0
+for sample in info.keys():
+    nsub = int(round(max(1,info[sample]['split']/hours),0))
+    nsub_tot += nsub
+    n_files = int(round(info[sample]['filesperjob']*hours,0))
+    print(f"Split by {n_files} file can be used for sample:{sample} to produce {nsub} jobs")
+print("nsub", nsub_tot)
+

From f595436271526abf2b7af015faaf768e1b7762f2 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:34:07 +0100
Subject: [PATCH 22/43] uniform path creating and info storing

---
 bucoffea/plot/plotter.py                      |  9 ++--
 .../stack_plot/get_hf_noise_estimate.py       | 45 +++++++------------
 .../plot/studies/stack_plot/plot_data_mc.py   | 29 ++----------
 bucoffea/plot/util.py                         | 14 ++++++
 4 files changed, 37 insertions(+), 60 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index 90b0b6c2d..d36ae79ef 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -205,8 +205,8 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
         # Otherwise, take the one from the relevant output directory
         if qcd_file:
             qcdfilepath = qcd_file
-        else:    
-            qcdfilepath = f'output/{outtag}/hf_estimate/vbfhinv_hf_estimate.root'
+        else:
+            qcdfilepath = pjoin(outtag,'hf_estimate','vbfhinv_hf_estimate.root')
         
         # Make sure that the HF-noise estimate ROOT file points to a valid path
         assert os.path.exists(qcdfilepath), f"HF-noise file cannot be found: {qcdfilepath}"
@@ -426,9 +426,8 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
                 transform=ax.transAxes
                )
 
-    outdir = f'./output/{outtag}/{data_region}'
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
+    outdir = pjoin(outtag,data_region)
+    os.system('mkdir -p '+outdir)
     
     # For each file format (PDF, PNG etc.), save the plot
     for fformat in fformats:
diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 50db4594d..863cded0a 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -9,7 +9,7 @@
 from datetime import datetime
 
 from matplotlib import pyplot as plt
-from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi
+from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi, dump_info
 from coffea import hist
 from klepto.archives import dir_archive
 from pprint import pprint
@@ -30,16 +30,7 @@
     'MET.*' : "Data"
 }
 
-def parse_cli():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('inpath', help='Path to the merged input accumulator.')
-    parser.add_argument('--years', nargs='*', type=int, default=[2017,2018], help='Years to run.')
-    parser.add_argument('--region', default='cr_vbf_qcd', help='Name of the HF-noise enriched control region as defined in the VBF H(inv) processor.')
-    parser.add_argument('--distribution', default='.*', help='Regex specifying the list of distributions to run.')
-    args = parser.parse_args()
-    return args
-
-def get_hf_noise_estimate(acc, outtag, outrootfile, distribution, years=[2017, 2018], region_name='cr_vbf_qcd'):
+def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2018], region_name='cr_vbf_qcd'):
     '''
     Calculate the noise template due to forward-jet noise (HF-noise) in VBF signal region.
     '''
@@ -63,10 +54,6 @@ def get_hf_noise_estimate(acc, outtag, outrootfile, distribution, years=[2017, 2
         new_ax = hist.Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
-    outdir = f'./output/{outtag}/hf_estimate'
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
-
     # Get data and MC yields in the QCD CR
     h = h.integrate('region', region_name)
     if distribution== 'particlenet_score':
@@ -178,35 +165,33 @@ def get_hf_noise_estimate(acc, outtag, outrootfile, distribution, years=[2017, 2
 
         outrootfile[f'hf_estimate_{distribution}_{year}'] = (sumw, xedges)
 
+def commandline():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('inpath', help='Path to the merged input accumulator.')
+    parser.add_argument('--years', nargs='*', type=int, default=[2017,2018], help='Years to run.')
+    parser.add_argument('--region', default='cr_vbf_qcd', help='Name of the HF-noise enriched control region as defined in the VBF H(inv) processor.')
+    parser.add_argument('--distribution', default='.*', help='Regex specifying the list of distributions to run.')
+    args = parser.parse_args()
+    return args
+
 def main():
-    args = parse_cli()
+    args = commandline()
     inpath = args.inpath
     acc = dir_archive(inpath)
     acc.load('sumw')
     acc.load('sumw2')
 
-    outtag = re.findall('merged_.*', inpath)[0].replace('/','')
-    outdir = f'./output/{outtag}/hf_estimate'
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
+    outdir = pjoin('./output/',args.inpath.replace('..','').replace('/',''),'hf_estimate')
+    dump_info(args, outdir)
 
     outrootpath = pjoin(outdir, 'vbfhinv_hf_estimate.root')
     outrootfile = uproot.recreate(outrootpath)
     print(f'ROOT file initiated: {outrootpath}')
 
-    # Store the command line arguments in the INFO.txt file
-    infofile = pjoin(outdir, 'INFO.txt')
-    with open(infofile, 'w+') as f:
-        f.write(f'QCD estimation most recently ran at: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}\n')
-        f.write('Command line arguments:\n\n')
-        cli = vars(args)
-        for arg, val in cli.items():
-            f.write(f'{arg}: {val}\n')
-
     for distribution in distributions['sr_vbf']:
         if not re.match(args.distribution, distribution):
             continue
-        get_hf_noise_estimate(acc, outtag, 
+        get_hf_noise_estimate(acc, outdir,
             outrootfile, 
             distribution=distribution, 
             years=args.years,
diff --git a/bucoffea/plot/studies/stack_plot/plot_data_mc.py b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
index 7bd8797c3..df20f8320 100755
--- a/bucoffea/plot/studies/stack_plot/plot_data_mc.py
+++ b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
@@ -7,11 +7,10 @@
 from klepto.archives import dir_archive
 from pprint import pprint
 from distributions import distributions
-from datetime import datetime
 from tqdm import tqdm
 
 from bucoffea.plot.plotter import plot_data_mc
-from bucoffea.plot.util import get_mc_scales
+from bucoffea.plot.util import get_mc_scales, dump_info
 
 pjoin = os.path.join
 
@@ -22,7 +21,8 @@ def make_plot(args):
     acc.load('sumw')
     acc.load('sumw2')
 
-    outtag = re.findall('merged_.*', args.inpath)[0].replace('/','')
+    outtag = pjoin('./output/',args.inpath.replace('..','').replace('/',''))
+    dump_info(args, outtag)
 
     for year in args.years:
         data = {
@@ -108,30 +108,9 @@ def commandline():
     args = parser.parse_args()
     return args
 
-def dump_info(args):
-    """
-    Function to dump information about the command line arguments to an INFO.txt file.
-    """
-    outdir = pjoin('./output/',list(filter(lambda x:x,args.inpath.split('/')))[-1])
-
-    # Store the command line arguments in the INFO.txt file
-    try:
-        os.makedirs(outdir)
-    except FileExistsError:
-        pass
-    
-    infofile = pjoin(outdir, 'INFO.txt')
-    with open(infofile, 'w+') as f:
-        f.write(f'Plot script run at: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}\n')
-        f.write('Command line arguments:\n\n')
-        cli = vars(args)
-        for arg, val in cli.items():
-            f.write(f'{arg}: {val}\n')
-
 def main():
     args = commandline()
-    dump_info(args)
-    make_plot(args)    
+    make_plot(args)
 
 if __name__ == "__main__":
     main()
diff --git a/bucoffea/plot/util.py b/bucoffea/plot/util.py
index 1fc701716..95033265d 100644
--- a/bucoffea/plot/util.py
+++ b/bucoffea/plot/util.py
@@ -541,3 +541,17 @@ def load_and_merge(inpath, distributions):
         acc[distribution] = merge_datasets(acc[distribution])
         acc[distribution].axis('dataset').sorting = 'integral'
     return acc
+
+
+def dump_info(args,outdir):
+    from datetime import datetime
+    """
+    Function to dump information about the command line arguments to an INFO.txt file.
+    """
+    os.system('mkdir -p '+outdir)
+    with open(pjoin(outdir, 'INFO.txt'), 'w+') as f:
+        f.write(f'Plot script run at: {datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}\n')
+        f.write('Command line arguments:\n\n')
+        cli = vars(args)
+        for arg, val in cli.items():
+            f.write(f'{arg}: {val}\n')
\ No newline at end of file

From 9e77f06c32d08ea6c3dd0f5dad410089c97f3427 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:36:30 +0100
Subject: [PATCH 23/43] remove duplicate ylims

---
 bucoffea/plot/plotter.py                      |  4 ++--
 .../plot/studies/stack_plot/distributions.py  | 19 +------------------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index d36ae79ef..826b5870a 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -76,8 +76,8 @@
 }
 
 ylims = {
-    'ak4_eta0' : (1e-3,1e8),
-    'ak4_eta1' : (1e-3,1e8),
+    'ak4_eta0' : (1e-3,1e5),
+    'ak4_eta1' : (1e-3,1e5),
     'ak4_nef0' : (1e0,1e8),
     'ak4_nef1' : (1e0,1e8),
     'ak4_nhf0' : (1e0,1e8),
diff --git a/bucoffea/plot/studies/stack_plot/distributions.py b/bucoffea/plot/studies/stack_plot/distributions.py
index a13fb298a..8a7813cd2 100755
--- a/bucoffea/plot/studies/stack_plot/distributions.py
+++ b/bucoffea/plot/studies/stack_plot/distributions.py
@@ -61,21 +61,4 @@
     'dphijr' : Bin("dphi", r"$min\Delta\phi(j,recoil)$", 50, 0, 3.5),
     'extra_ak4_mult' : Bin("multiplicity", r"Additional AK4 Jet Multiplicity", 10, -0.5, 9.5),
     'recoil' : Bin('recoil','Recoil (GeV)', recoil_bins_2016),
-}
-
-ylims = {
-    'ak4_eta0' : (1e-3,1e8),
-    'ak4_eta1' : (1e-3,1e8),
-    'ak4_nef0' : (1e0,1e8),
-    'ak4_nef1' : (1e0,1e8),
-    'ak4_nhf0' : (1e0,1e8),
-    'ak4_nhf1' : (1e0,1e8),
-    'ak4_chf0' : (1e0,1e8),
-    'ak4_chf1' : (1e0,1e8),
-    'vecb' : (1e-1,1e9),
-    'vecdphi' : (1e0,1e9),
-    'dphitkpf' : (1e0,1e9),
-    'met' : (1e-3,1e5),
-    'ak4_mult' : (1e-1,1e8),
-    'particlenet_score' : (1e-1,1e8),
-}
+}
\ No newline at end of file

From d9c26bd238d910bd175dbd39b9b89469d55c5e36 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:43:00 +0100
Subject: [PATCH 24/43] generalise list of distributions

---
 .../plot/studies/stack_plot/distributions.py  | 40 ++++++++-----------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/bucoffea/plot/studies/stack_plot/distributions.py b/bucoffea/plot/studies/stack_plot/distributions.py
index 8a7813cd2..b7fc25efb 100755
--- a/bucoffea/plot/studies/stack_plot/distributions.py
+++ b/bucoffea/plot/studies/stack_plot/distributions.py
@@ -4,33 +4,27 @@
 
 Bin = hist.Bin
 
+def obj_variables(object_name, indices, vars, extravars=None):
+    if len(indices)==0:
+        indices = ['']
+    if extravars is not None:
+        vars += extravars
+    return [f"{object_name}_{var}{id}" for var in vars for id in indices]
 
-common_distributions = [
-    'mjj',
-    'detajj',
-    'dphijj',
-    #'cnn_score',
-    #'dnn_score',
-    'particlenet_score',
-    'recoil',
-    'ak4_eta0',
-    'ak4_eta1',
-    'ak4_pt0',
-    'ak4_pt1',
-    # 'ak4_central_eta',
-    # 'ak4_forward_eta',
-    # 'dphijr',
-]
+
+common_distributions = [ 'mjj', 'detajj', 'dphijj', 'recoil', 'dphijr', 'particlenet_score']
+common_distributions += obj_variables(object_name='ak4', indices=[0,1], vars=['eta','pt'])
+# common_distributions += obj_variables(object_name='ak4', indices=[''], vars=['central_eta','forward_eta'])
 
 # Distributions to plot for each region
 distributions = {
-    'sr_vbf' : common_distributions + ['ak4_nef0', 'ak4_nef1', 'ak4_nhf0', 'ak4_nhf1', 'ak4_chf0', 'ak4_chf1'],
-    'sr_vbf_nodijetcut' : common_distributions + ['ak4_nef0', 'ak4_nef1', 'ak4_nhf0', 'ak4_nhf1', 'ak4_chf0', 'ak4_chf1'],
-    'cr_1m_vbf' : common_distributions + ['muon_pt', 'muon_eta', 'muon_phi', 'muon_mt'],
-    'cr_1e_vbf' : common_distributions + ['electron_pt', 'electron_eta', 'electron_phi', 'electron_mt'],
-    'cr_2m_vbf' : common_distributions + ['muon_pt0', 'muon_eta0', 'muon_phi0', 'muon_pt1', 'muon_eta1', 'muon_phi1', 'dimuon_mass'],
-    'cr_2e_vbf' : common_distributions + ['electron_pt0', 'electron_eta0', 'electron_phi0', 'electron_pt1', 'electron_eta1', 'electron_phi1', 'dielectron_mass'],
-    'cr_g_vbf'  : common_distributions + ['photon_pt0', 'photon_eta0', 'photon_phi0'],
+    'sr_vbf'    :         common_distributions + obj_variables(object_name='ak4',       indices=[0,1], vars=['nef','nhf','chf']),
+    'sr_vbf_nodijetcut' : common_distributions + obj_variables(object_name='ak4',       indices=[0,1], vars=['nef','nhf','chf']),
+    'cr_1m_vbf' :         common_distributions + obj_variables(object_name='muon',      indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
+    'cr_1e_vbf' :         common_distributions + obj_variables(object_name='electron',  indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
+    'cr_2m_vbf' :         common_distributions + obj_variables(object_name='muon',      indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dimuon_mass'],
+    'cr_2e_vbf' :         common_distributions + obj_variables(object_name='electron',  indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dielectron_mass'],
+    'cr_g_vbf'  :         common_distributions + obj_variables(object_name='photon',    indices=[0],   vars=['pt', 'eta', 'phi']),
 } 
 
 recoil_bins_2016 = [ 250,  280,  310,  340,  370,  400,  430,  470,  510, 550,  590,  640,  690,  740,  790,  840,  900,  960, 1020, 1090, 1160, 1250, 1400]

From a1d5357e5151cb5c3e6569d233c5f968fafb7959 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:50:07 +0100
Subject: [PATCH 25/43] fix binning difference in qcd estimation

---
 bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 863cded0a..611820f30 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -42,6 +42,8 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
     h = merge_datasets(h)
 
     overflow = 'none'
+    if distribution == 'mjj':
+        overflow = 'over'
 
     # Rebin if neccessary
     if distribution in binnings.keys():

From d048e3303f02702ef9d0bceb72da20988270b912 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:56:31 +0100
Subject: [PATCH 26/43] remove duplicate

---
 .../studies/stack_plot/get_hf_noise_estimate.py    | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 611820f30..1ad5b7962 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -14,22 +14,10 @@
 from klepto.archives import dir_archive
 from pprint import pprint
 from distributions import distributions, binnings
+from bucoffea.plot.plotter import legend_labels
 
 pjoin = os.path.join
 
-legend_labels = {
-    'DY.*' : "QCD Z$\\rightarrow\\ell\\ell$",
-    'EWKZ.*ZToLL.*' : "EWK Z$\\rightarrow\\ell\\ell$",
-    'WN*J.*LNu.*' : "QCD W$\\rightarrow\\ell\\nu$",
-    'EWKW.*LNu.*' : "EWK W$\\rightarrow\\ell\\nu$",
-    'ZN*JetsToNuNu.*.*' : "QCD Z$\\rightarrow\\nu\\nu$",
-    'EWKZ.*ZToNuNu.*' : "EWK Z$\\rightarrow\\nu\\nu$",
-    'QCD.*' : "QCD",
-    'Top.*' : "Top quark",
-    'Diboson.*' : "WW/WZ/ZZ",
-    'MET.*' : "Data"
-}
-
 def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2018], region_name='cr_vbf_qcd'):
     '''
     Calculate the noise template due to forward-jet noise (HF-noise) in VBF signal region.

From 45d8b7860d72068b2f286dbfda4a9f16938e9242 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 12:58:20 +0100
Subject: [PATCH 27/43] remove duplicate

---
 .../plot/studies/stack_plot/distributions.py  | 30 -------------------
 .../stack_plot/get_hf_noise_estimate.py       |  4 +--
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/bucoffea/plot/studies/stack_plot/distributions.py b/bucoffea/plot/studies/stack_plot/distributions.py
index b7fc25efb..ba66cc05d 100755
--- a/bucoffea/plot/studies/stack_plot/distributions.py
+++ b/bucoffea/plot/studies/stack_plot/distributions.py
@@ -25,34 +25,4 @@ def obj_variables(object_name, indices, vars, extravars=None):
     'cr_2m_vbf' :         common_distributions + obj_variables(object_name='muon',      indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dimuon_mass'],
     'cr_2e_vbf' :         common_distributions + obj_variables(object_name='electron',  indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dielectron_mass'],
     'cr_g_vbf'  :         common_distributions + obj_variables(object_name='photon',    indices=[0],   vars=['pt', 'eta', 'phi']),
-} 
-
-recoil_bins_2016 = [ 250,  280,  310,  340,  370,  400,  430,  470,  510, 550,  590,  640,  690,  740,  790,  840,  900,  960, 1020, 1090, 1160, 1250, 1400]
-
-binnings = {
-    'particlenet_score': Bin('score', r'DNN score', 50, 0, 1),
-    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [50, 100., 200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500., 5000.]),
-    'ak4_pt0': Bin('jetpt',r'Leading AK4 jet $p_{T}$ (GeV)',list(range(80,600,20)) + list(range(600,1000,20)) ),
-    'ak4_pt1': Bin('jetpt',r'Trailing AK4 jet $p_{T}$ (GeV)',list(range(40,600,20)) + list(range(600,1000,20)) ),
-    'ak4_phi0' : Bin("jetphi", r"Leading AK4 jet $\phi$", 50,-np.pi, np.pi),
-    'ak4_phi1' : Bin("jetphi", r"Trailing AK4 jet $\phi$", 50,-np.pi, np.pi),
-    'ak4_eta0' : Bin("jeteta", r"Leading Jet $\eta$", 50, -5, 5),
-    'ak4_eta1' : Bin("jeteta", r"Leading Jet $\eta$", 50, -5, 5),
-    'ak4_central_eta' : Bin("jeteta", r"More Central Jet $\eta$", 50, -5, 5),
-    'ak4_forward_eta' : Bin("jeteta", r"More Forward Jet $\eta$", 50, -5, 5),
-    'ak4_nef0' : Bin('frac', 'Leading Jet Neutral EM Frac', 50, 0, 1),
-    'ak4_nef1' : Bin('frac', 'Trailing Jet Neutral EM Frac', 50, 0, 1),
-    'ak4_nhf0' : Bin('frac', 'Leading Jet Neutral Hadronic Frac', 50, 0, 1),
-    'ak4_nhf1' : Bin('frac', 'Trailing Jet Neutral Hadronic Frac', 50, 0, 1),
-    'ak4_chf0' : Bin('frac', 'Leading Jet Charged Hadronic Frac', 50, 0, 1),
-    'ak4_chf1' : Bin('frac', 'Trailing Jet Charged Hadronic Frac', 50, 0, 1),
-    # 'dphitkpf' : Bin('dphi', r'$\Delta\phi_{TK,PF}$', 50, 0, 3.5),
-    'met' : Bin('met',r'$p_{T}^{miss}$ (GeV)',list(range(0,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))),
-    'met_phi' : Bin("phi", r"$\phi_{MET}$", 50, -np.pi, np.pi),
-    'ak4_mult' : Bin("multiplicity", r"AK4 multiplicity", 10, -0.5, 9.5),
-    'ak4_mt0' : Bin("mt", r"Leading AK4 $M_{T}$ (GeV)", 50, 0, 1000),
-    'ak4_mt1' : Bin("mt", r"Trailing AK4 $M_{T}$ (GeV)", 50, 0, 1000),
-    'dphijr' : Bin("dphi", r"$min\Delta\phi(j,recoil)$", 50, 0, 3.5),
-    'extra_ak4_mult' : Bin("multiplicity", r"Additional AK4 Jet Multiplicity", 10, -0.5, 9.5),
-    'recoil' : Bin('recoil','Recoil (GeV)', recoil_bins_2016),
 }
\ No newline at end of file
diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 1ad5b7962..6f66e69e8 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -13,8 +13,8 @@
 from coffea import hist
 from klepto.archives import dir_archive
 from pprint import pprint
-from distributions import distributions, binnings
-from bucoffea.plot.plotter import legend_labels
+from distributions import distributions
+from bucoffea.plot.plotter import binnings, legend_labels
 
 pjoin = os.path.join
 

From e6063b574943ab667fca368c04b00b3170e27d1f Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 13:20:40 +0100
Subject: [PATCH 28/43] update default binning

---
 bucoffea/plot/plotter.py | 98 +++++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 41 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index 826b5870a..a0b50300f 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -31,48 +31,64 @@
 pylab.rcParams.update(params)
 
 Bin = hist.Bin
-
-recoil_bins_2016 = [ 250,  280,  310,  340,  370,  400,  430,  470,  510, 550,  590,  640,  690,  740,  790,  840,  900,  960, 1020, 1090, 1160, 1250, 1400]
+high_pt_bins = list(range(600,1000,20))
+
+bins = {
+    'mjj':           [[50, 100., 200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500.]],
+    'recoil':        [[250, 280, 310, 340, 370, 400, 430, 470, 510, 550, 590, 640, 690, 740, 790, 840, 900, 960, 1020, 1090, 1160, 1250, 1400]],
+    'phi':           [50, -np.pi, np.pi],
+    'eta':           [50, -5, 5],
+    'frac':          [50, 0, 1],
+    'mt':            [list(range(0,1000,20))],
+    'met':           [list(range(0,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))],
+    'lep_pt':        [list(range(0,600,20))],
+    'photon_pt':     [list(range(200,600,20)) + high_pt_bins],
+    'jet_pt0':       [list(range(80,600,20)) + high_pt_bins],
+    'jet_pt1':       [list(range(40,600,20)) + high_pt_bins],
+    'dilepton_mass': [30,60,120],
+    'dphi':          [50, 0, 3.5],
+    'mult':          [10, -0.5, 9.5],
+}
 
 binnings = {
-    'mjj': Bin('mjj', r'$M_{jj} \ (GeV)$', [50, 100., 200., 400., 600., 900., 1200., 1500., 2000., 2750., 3500.]),
-    'particlenet_score': Bin('score', r'DNN score', 50, 0, 1),
-    'cnn_score': Bin('score', r'CNN score', 25, 0, 1),
-    'ak4_pt0': Bin('jetpt',r'Leading AK4 jet $p_{T}$ (GeV)',list(range(80,600,20)) + list(range(600,1000,20)) ),
-    'ak4_pt1': Bin('jetpt',r'Trailing AK4 jet $p_{T}$ (GeV)',list(range(40,600,20)) + list(range(600,1000,20)) ),
-    'ak4_phi0' : Bin("jetphi", r"Leading AK4 jet $\phi$", 50,-np.pi, np.pi),
-    'ak4_phi1' : Bin("jetphi", r"Trailing AK4 jet $\phi$", 50,-np.pi, np.pi),
-    'ak4_nef0' : Bin('frac', 'Leading Jet Neutral EM Frac', 50, 0, 1),
-    'ak4_nef1' : Bin('frac', 'Trailing Jet Neutral EM Frac', 50, 0, 1),
-    'ak4_nhf0' : Bin('frac', 'Leading Jet Neutral Hadronic Frac', 50, 0, 1),
-    'ak4_nhf1' : Bin('frac', 'Trailing Jet Neutral Hadronic Frac', 50, 0, 1),
-    'ak4_chf0' : Bin('frac', 'Leading Jet Charged Hadronic Frac', 50, 0, 1),
-    'ak4_chf1' : Bin('frac', 'Trailing Jet Charged Hadronic Frac', 50, 0, 1),
-    'ak4_central_eta' : Bin("jeteta", r"More Central Jet $\eta$", 50, -5, 5),
-    'ak4_forward_eta' : Bin("jeteta", r"More Forward Jet $\eta$", 50, -5, 5),
-    'extra_ak4_mult' : Bin("multiplicity", r"Additional AK4 Jet Multiplicity", 10, -0.5, 9.5),
-    # 'dphitkpf' : Bin('dphi', r'$\Delta\phi_{TK,PF}$', 50, 0, 3.5),
-    'met' : Bin('met',r'$p_{T}^{miss}$ (GeV)',list(range(0,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))),
-    'met_phi' : Bin("phi", r"$\phi_{MET}$", 50, -np.pi, np.pi),
-    'calomet_pt' : Bin('met',r'$p_{T,CALO}^{miss,no-\ell}$ (GeV)',list(range(0,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))),
-    'calomet_phi' : Bin('phi',r'$\phi_{MET}^{CALO}$', 50, -np.pi, np.pi),
-    'ak4_mult' : Bin("multiplicity", r"AK4 multiplicity", 10, -0.5, 9.5),
-    'electron_pt' : hist.Bin('pt',r'Electron $p_{T}$ (GeV)',list(range(0,600,20))),
-    'electron_pt0' : hist.Bin('pt',r'Leading electron $p_{T}$ (GeV)',list(range(0,600,20))),
-    'electron_pt1' : hist.Bin('pt',r'Trailing electron $p_{T}$ (GeV)',list(range(0,600,20))),
-    'electron_mt' : hist.Bin('mt',r'Electron $M_{T}$ (GeV)',list(range(0,800,20))),
-    'muon_pt' : hist.Bin('pt',r'Muon $p_{T}$ (GeV)',list(range(0,600,20))),
-    'muon_pt0' : hist.Bin('pt',r'Leading muon $p_{T}$ (GeV)',list(range(0,600,20))),
-    'muon_pt1' : hist.Bin('pt',r'Trailing muon $p_{T}$ (GeV)',list(range(0,600,20))),
-    'muon_mt' : hist.Bin('mt',r'Muon $M_{T}$ (GeV)',list(range(0,800,20))),
-    'photon_pt0' : hist.Bin('pt',r'Photon $p_{T}$ (GeV)',list(range(200,600,20)) + list(range(600,1000,20)) ),
-    'recoil' : hist.Bin('recoil','Recoil (GeV)', recoil_bins_2016),
-    'dphijr' : Bin("dphi", r"min $\Delta\phi(j,recoil)$", 50, 0, 3.5),
-    'dimuon_mass' : hist.Bin('dilepton_mass',r'M($\mu^{+}\mu^{-}$)',30,60,120),
-    'dielectron_mass' : hist.Bin('dilepton_mass',r'M($e^{+}e^{-}$)',30,60,120),
-    'mjj_transformed' : hist.Bin('transformed', r'Rescaled $M_{jj}$', 50, -5, 5),
-    'detajj_transformed' : hist.Bin('transformed', r'Rescaled $\Delta\eta_{jj}$', 50, -5, 5),
-    'dphijj_transformed' : hist.Bin('transformed', r'Rescaled $\Delta\phi_{jj}$', 50, -5, 5),
+    'mjj':                Bin('mjj',             r'$M_{jj} \ (GeV)$',                      *bins['mjj']),
+    'particlenet_score':  Bin('score',           r'DNN score',                             *bins['frac']),
+    'cnn_score':          Bin('score',           r'CNN score',                             *bins['frac']),
+    'ak4_pt0':            Bin('jetpt',           r'Leading AK4 jet $p_{T}$ (GeV)',         *bins['jet_pt0']),
+    'ak4_pt1':            Bin('jetpt',           r'Trailing AK4 jet $p_{T}$ (GeV)',        *bins['jet_pt1']),
+    'ak4_phi0':           Bin('jetphi',          r'Leading AK4 jet $\phi$',                *bins['phi']),
+    'ak4_phi1':           Bin('jetphi',          r'Trailing AK4 jet $\phi$',               *bins['phi']),
+    'ak4_nef0':           Bin('frac',            r'Leading Jet Neutral EM Frac',           *bins['frac']),
+    'ak4_nef1':           Bin('frac',            r'Trailing Jet Neutral EM Frac',          *bins['frac']),
+    'ak4_nhf0':           Bin('frac',            r'Leading Jet Neutral Hadronic Frac',     *bins['frac']),
+    'ak4_nhf1':           Bin('frac',            r'Trailing Jet Neutral Hadronic Frac',    *bins['frac']),
+    'ak4_chf0':           Bin('frac',            r'Leading Jet Charged Hadronic Frac',     *bins['frac']),
+    'ak4_chf1':           Bin('frac',            r'Trailing Jet Charged Hadronic Frac',    *bins['frac']),
+    'ak4_central_eta':    Bin('jeteta',          r'More Central Jet $\eta$',               *bins['eta']),
+    'ak4_forward_eta':    Bin('jeteta',          r'More Forward Jet $\eta$',               *bins['eta']),
+    'extra_ak4_mult':     Bin('multiplicity',    r'Additional AK4 Jet Multiplicity',       *bins['mult']),
+    # 'dphitkpf':         Bin('dphi',            r'$\Delta\phi_{TK,PF}$',                  *bins['dphi']),
+    'met':                Bin('met',             r'$p_{T}^{miss}$ (GeV)',                  *bins['met']),
+    'met_phi':            Bin('phi',             r'$\phi_{MET}$',                          *bins['phi']),
+    'calomet_pt':         Bin('met',             r'$p_{T,CALO}^{miss,no-\ell}$ (GeV)',     *bins['met']),
+    'calomet_phi':        Bin('phi',             r'$\phi_{MET}^{CALO}$',                   *bins['phi']),
+    'ak4_mult':           Bin('multiplicity',    r'AK4 multiplicity',                      *bins['mult']),
+    'electron_pt':        Bin('pt',              r'Electron $p_{T}$ (GeV)',                *bins['lep_pt']),
+    'electron_pt0':       Bin('pt',              r'Leading electron $p_{T}$ (GeV)',        *bins['lep_pt']),
+    'electron_pt1':       Bin('pt',              r'Trailing electron $p_{T}$ (GeV)',       *bins['lep_pt']),
+    'electron_mt':        Bin('mt',              r'Electron $M_{T}$ (GeV)',                *bins['mt']),
+    'muon_pt':            Bin('pt',              r'Muon $p_{T}$ (GeV)',                    *bins['lep_pt']),
+    'muon_pt0':           Bin('pt',              r'Leading muon $p_{T}$ (GeV)',            *bins['lep_pt']),
+    'muon_pt1':           Bin('pt',              r'Trailing muon $p_{T}$ (GeV)',           *bins['lep_pt']),
+    'muon_mt':            Bin('mt',              r'Muon $M_{T}$ (GeV)',                    *bins['mt']),
+    'photon_pt0':         Bin('pt',              r'Photon $p_{T}$ (GeV)',                  *bins['photon_pt']),
+    'recoil':             Bin('recoil',          r'Recoil (GeV)',                          *bins['recoil']),
+    'dphijr':             Bin('dphi',            r'min $\Delta\phi(j,recoil)$',            *bins['dphi']),
+    'dimuon_mass':        Bin('dilepton_mass',   r'M($\mu^{+}\mu^{-}$)',                   *bins['dilepton_mass']),
+    'dielectron_mass':    Bin('dilepton_mass',   r'M($e^{+}e^{-}$)',                       *bins['dilepton_mass']),
+    'mjj_transformed':    Bin('transformed',     r'Rescaled $M_{jj}$',                     *bins['eta']),
+    'detajj_transformed': Bin('transformed',     r'Rescaled $\Delta\eta_{jj}$',            *bins['eta']),
+    'dphijj_transformed': Bin('transformed',     r'Rescaled $\Delta\phi_{jj}$',            *bins['eta']),
 }
 
 ylims = {
@@ -187,7 +203,7 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     elif distribution == 'dphitkpf':
         new_bins = [ibin.lo for ibin in h.identifiers('dphi') if ibin.lo < 2] + [3.5]
         
-        new_ax = hist.Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
+        new_ax = Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
     # This sorting messes up in SR for some reason

From 04be46d11b237a34d6fe19af4e23b2facc48fe1c Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 13:37:24 +0100
Subject: [PATCH 29/43] uniform variables

---
 bucoffea/plot/plotter.py | 86 +++++++++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 37 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index a0b50300f..e3681c0cf 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -50,43 +50,55 @@
     'mult':          [10, -0.5, 9.5],
 }
 
+mjj = '$M_{jj} \ (GeV)$'
+pt  ='$p_{T}$ (GeV)'
+eta ='$\eta$'
+phi ='$\phi$'
+nef = 'Neutral EM Frac'
+nhf = 'Neutral Hadron Frac'
+chf = 'Charged Hadron Frac'
+jet0= 'Leading Jet'
+jet1= 'Trailing Jet'
+
 binnings = {
-    'mjj':                Bin('mjj',             r'$M_{jj} \ (GeV)$',                      *bins['mjj']),
-    'particlenet_score':  Bin('score',           r'DNN score',                             *bins['frac']),
-    'cnn_score':          Bin('score',           r'CNN score',                             *bins['frac']),
-    'ak4_pt0':            Bin('jetpt',           r'Leading AK4 jet $p_{T}$ (GeV)',         *bins['jet_pt0']),
-    'ak4_pt1':            Bin('jetpt',           r'Trailing AK4 jet $p_{T}$ (GeV)',        *bins['jet_pt1']),
-    'ak4_phi0':           Bin('jetphi',          r'Leading AK4 jet $\phi$',                *bins['phi']),
-    'ak4_phi1':           Bin('jetphi',          r'Trailing AK4 jet $\phi$',               *bins['phi']),
-    'ak4_nef0':           Bin('frac',            r'Leading Jet Neutral EM Frac',           *bins['frac']),
-    'ak4_nef1':           Bin('frac',            r'Trailing Jet Neutral EM Frac',          *bins['frac']),
-    'ak4_nhf0':           Bin('frac',            r'Leading Jet Neutral Hadronic Frac',     *bins['frac']),
-    'ak4_nhf1':           Bin('frac',            r'Trailing Jet Neutral Hadronic Frac',    *bins['frac']),
-    'ak4_chf0':           Bin('frac',            r'Leading Jet Charged Hadronic Frac',     *bins['frac']),
-    'ak4_chf1':           Bin('frac',            r'Trailing Jet Charged Hadronic Frac',    *bins['frac']),
-    'ak4_central_eta':    Bin('jeteta',          r'More Central Jet $\eta$',               *bins['eta']),
-    'ak4_forward_eta':    Bin('jeteta',          r'More Forward Jet $\eta$',               *bins['eta']),
-    'extra_ak4_mult':     Bin('multiplicity',    r'Additional AK4 Jet Multiplicity',       *bins['mult']),
-    # 'dphitkpf':         Bin('dphi',            r'$\Delta\phi_{TK,PF}$',                  *bins['dphi']),
+    'mjj':                Bin('mjj',             f'{mjj}',                                 *bins['mjj']),
+    'particlenet_score':  Bin('score',           f'DNN score',                             *bins['frac']),
+    'cnn_score':          Bin('score',           f'CNN score',                             *bins['frac']),
+    'ak4_pt0':            Bin('jetpt',           f'{jet0} {pt}',                           *bins['jet_pt0']),
+    'ak4_pt1':            Bin('jetpt',           f'{jet1} {pt}',                           *bins['jet_pt1']),
+    'ak4_eta0':           Bin('jeteta',          f'{jet0} {eta}',                          *bins['eta']),
+    'ak4_eta1':           Bin('jeteta',          f'{jet1} {eta}',                          *bins['eta']),
+    'ak4_phi0':           Bin('jetphi',          f'{jet0} {phi}',                          *bins['phi']),
+    'ak4_phi1':           Bin('jetphi',          f'{jet1} {phi}',                          *bins['phi']),
+    'ak4_nef0':           Bin('frac',            f'{jet0} {nef}',                          *bins['frac']),
+    'ak4_nef1':           Bin('frac',            f'{jet1} {nef}',                          *bins['frac']),
+    'ak4_nhf0':           Bin('frac',            f'{jet0} {nhf}',                          *bins['frac']),
+    'ak4_nhf1':           Bin('frac',            f'{jet1} {nhf}',                          *bins['frac']),
+    'ak4_chf0':           Bin('frac',            f'{jet0} {chf}',                          *bins['frac']),
+    'ak4_chf1':           Bin('frac',            f'{jet1} {chf}',                          *bins['frac']),
+    'ak4_central_eta':    Bin('jeteta',          f'More Central Jet {eta}',                *bins['eta']),
+    'ak4_forward_eta':    Bin('jeteta',          f'More Forward Jet {eta}',                *bins['eta']),
+    'extra_ak4_mult':     Bin('multiplicity',    f'Additional Jet Multiplicity',           *bins['mult']),
+    # 'dphitkpf':         Bin('dphi',            f'$\Delta\phi_{TK,PF}$',                  *bins['dphi']),
     'met':                Bin('met',             r'$p_{T}^{miss}$ (GeV)',                  *bins['met']),
     'met_phi':            Bin('phi',             r'$\phi_{MET}$',                          *bins['phi']),
     'calomet_pt':         Bin('met',             r'$p_{T,CALO}^{miss,no-\ell}$ (GeV)',     *bins['met']),
     'calomet_phi':        Bin('phi',             r'$\phi_{MET}^{CALO}$',                   *bins['phi']),
-    'ak4_mult':           Bin('multiplicity',    r'AK4 multiplicity',                      *bins['mult']),
-    'electron_pt':        Bin('pt',              r'Electron $p_{T}$ (GeV)',                *bins['lep_pt']),
-    'electron_pt0':       Bin('pt',              r'Leading electron $p_{T}$ (GeV)',        *bins['lep_pt']),
-    'electron_pt1':       Bin('pt',              r'Trailing electron $p_{T}$ (GeV)',       *bins['lep_pt']),
+    'ak4_mult':           Bin('multiplicity',    f'AK4 multiplicity',                      *bins['mult']),
+    'electron_pt':        Bin('pt',              f'Electron {pt}',                         *bins['lep_pt']),
+    'electron_pt0':       Bin('pt',              f'Leading Electron {pt}',                 *bins['lep_pt']),
+    'electron_pt1':       Bin('pt',              f'Trailing Electron {pt}',                *bins['lep_pt']),
     'electron_mt':        Bin('mt',              r'Electron $M_{T}$ (GeV)',                *bins['mt']),
-    'muon_pt':            Bin('pt',              r'Muon $p_{T}$ (GeV)',                    *bins['lep_pt']),
-    'muon_pt0':           Bin('pt',              r'Leading muon $p_{T}$ (GeV)',            *bins['lep_pt']),
-    'muon_pt1':           Bin('pt',              r'Trailing muon $p_{T}$ (GeV)',           *bins['lep_pt']),
+    'muon_pt':            Bin('pt',              f'Muon {pt}',                             *bins['lep_pt']),
+    'muon_pt0':           Bin('pt',              f'Leading Muon {pt}',                     *bins['lep_pt']),
+    'muon_pt1':           Bin('pt',              f'Trailing Muon {pt}',                    *bins['lep_pt']),
     'muon_mt':            Bin('mt',              r'Muon $M_{T}$ (GeV)',                    *bins['mt']),
-    'photon_pt0':         Bin('pt',              r'Photon $p_{T}$ (GeV)',                  *bins['photon_pt']),
-    'recoil':             Bin('recoil',          r'Recoil (GeV)',                          *bins['recoil']),
-    'dphijr':             Bin('dphi',            r'min $\Delta\phi(j,recoil)$',            *bins['dphi']),
+    'photon_pt0':         Bin('pt',              f'Photon {pt}',                           *bins['photon_pt']),
+    'recoil':             Bin('recoil',          f'Recoil (GeV)',                          *bins['recoil']),
+    'dphijr':             Bin('dphi',            f'min $\Delta\phi(j,recoil)$',            *bins['dphi']),
     'dimuon_mass':        Bin('dilepton_mass',   r'M($\mu^{+}\mu^{-}$)',                   *bins['dilepton_mass']),
     'dielectron_mass':    Bin('dilepton_mass',   r'M($e^{+}e^{-}$)',                       *bins['dilepton_mass']),
-    'mjj_transformed':    Bin('transformed',     r'Rescaled $M_{jj}$',                     *bins['eta']),
+    'mjj_transformed':    Bin('transformed',     f'Rescaled {mjj}',                        *bins['eta']),
     'detajj_transformed': Bin('transformed',     r'Rescaled $\Delta\eta_{jj}$',            *bins['eta']),
     'dphijj_transformed': Bin('transformed',     r'Rescaled $\Delta\phi_{jj}$',            *bins['eta']),
 }
@@ -140,11 +152,11 @@
 
 legend_titles = {
     'sr_vbf' : 'VBF Signal Region',
-    'cr_1m_vbf' : r'VBF $1\mu$ Region',
-    'cr_2m_vbf' : r'VBF $2\mu$ Region',
-    'cr_1e_vbf' : r'VBF $1e$ Region',
-    'cr_2e_vbf' : r'VBF $2e$ Region',
-    'cr_g_vbf' : r'VBF $\gamma$ Region',
+    'cr_1m_vbf' : f'VBF $1\mu$ Region',
+    'cr_2m_vbf' : f'VBF $2\mu$ Region',
+    'cr_1e_vbf' : f'VBF $1e$ Region',
+    'cr_2e_vbf' : f'VBF $2e$ Region',
+    'cr_g_vbf' : f'VBF $\gamma$ Region',
 }
 
 colors = {
@@ -203,7 +215,7 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     elif distribution == 'dphitkpf':
         new_bins = [ibin.lo for ibin in h.identifiers('dphi') if ibin.lo < 2] + [3.5]
         
-        new_ax = Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
+        new_ax = Bin('dphi', f'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
     # This sorting messes up in SR for some reason
@@ -366,9 +378,9 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
         )
 
     xlabels = {
-        'mjj': r'$M_{jj} \ (GeV)$',
-        'ak4_eta0': r'Leading Jet $\eta$',
-        'ak4_eta1': r'Trailing Jet $\eta$',
+        'mjj': f'{mjj}',
+        'ak4_eta0': f'{jet0} {eta}',
+        'ak4_eta1': f'{jet1} {eta}',
     }
 
     if distribution in xlabels.keys():

From 5b9dc78013848cf625128d9f1304efeb9b9d3a84 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 13:41:51 +0100
Subject: [PATCH 30/43] uniform variables 2

---
 bucoffea/plot/plotter.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index e3681c0cf..9d3ef6b62 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -50,7 +50,7 @@
     'mult':          [10, -0.5, 9.5],
 }
 
-mjj = '$M_{jj} \ (GeV)$'
+mjj = '$M_{jj}$ (GeV)'
 pt  ='$p_{T}$ (GeV)'
 eta ='$\eta$'
 phi ='$\phi$'
@@ -152,11 +152,11 @@
 
 legend_titles = {
     'sr_vbf' : 'VBF Signal Region',
-    'cr_1m_vbf' : f'VBF $1\mu$ Region',
-    'cr_2m_vbf' : f'VBF $2\mu$ Region',
-    'cr_1e_vbf' : f'VBF $1e$ Region',
-    'cr_2e_vbf' : f'VBF $2e$ Region',
-    'cr_g_vbf' : f'VBF $\gamma$ Region',
+    'cr_1m_vbf' : r'VBF $1\mu$ Region',
+    'cr_2m_vbf' : r'VBF $2\mu$ Region',
+    'cr_1e_vbf' : r'VBF $1e$ Region',
+    'cr_2e_vbf' : r'VBF $2e$ Region',
+    'cr_g_vbf'  : r'VBF $\gamma$ Region',
 }
 
 colors = {
@@ -215,7 +215,7 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     elif distribution == 'dphitkpf':
         new_bins = [ibin.lo for ibin in h.identifiers('dphi') if ibin.lo < 2] + [3.5]
         
-        new_ax = Bin('dphi', f'$\Delta\phi_{TK,PF}$', new_bins)
+        new_ax = Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
     # This sorting messes up in SR for some reason
@@ -376,16 +376,6 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
             histtype='errorbar',
             **data_err_opts
         )
-
-    xlabels = {
-        'mjj': f'{mjj}',
-        'ak4_eta0': f'{jet0} {eta}',
-        'ak4_eta1': f'{jet1} {eta}',
-    }
-
-    if distribution in xlabels.keys():
-        ax.set_xlabel(xlabels[distribution])
-        rax.set_xlabel(xlabels[distribution])
     
     rax.set_ylabel('Data / MC')
     rax.set_ylim(0.5,1.5)

From f95886614072513248ca26cd0cd16923886899a5 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 17:35:28 +0100
Subject: [PATCH 31/43] add control regions

---
 bucoffea/config/vbfhinv.yaml         | 2 ++
 bucoffea/vbfhinv/definitions.py      | 9 ++++++++-
 bucoffea/vbfhinv/vbfhinvProcessor.py | 4 +++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/bucoffea/config/vbfhinv.yaml b/bucoffea/config/vbfhinv.yaml
index 3dfcf614b..ea7e3ff06 100644
--- a/bucoffea/config/vbfhinv.yaml
+++ b/bucoffea/config/vbfhinv.yaml
@@ -8,6 +8,8 @@ default:
           mass: 200
           dphi: 1.5
           deta: 1
+          highdphi: 2
+          highdeta: 3
         cut_based:
           mass: 1300
           dphi: 1.5
diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index d00903115..0e87e5de7 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -334,7 +334,14 @@ def append_items(original_list, items_to_add):
         regions['sr_vbf'].remove('eemitigation')
 
     if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
-        regions['sr_vbf_nodijetcut'] = remove_items(regions['sr_vbf'], ['mjj','detajj','dphijj'])
+        regions['sr_vbf_loose'] = remove_items(regions['sr_vbf'], ['mjj','detajj','dphijj'])
+        regions['sr_vbf_loose_dphi']       = append_items(regions['sr_vbf_loose'], ['dphijj'])
+        regions['sr_vbf_loose_deta']       = append_items(regions['sr_vbf_loose'], ['detajj'])
+        regions['sr_vbf_loose_dphi_deta']  = append_items(regions['sr_vbf_loose'], ['dphijj', 'detajj'])
+
+        regions['sr_vbf_highdphi']          = append_items(regions['sr_vbf_loose'], ['highdphijj'])
+        regions['sr_vbf_highdphi_mjj']      = append_items(regions['sr_vbf_highdphi'], ['mjj'])
+        regions['sr_vbf_highdphi_highdeta'] = append_items(regions['sr_vbf_highdphi'], ['highdetajj'])
 
     # SR without PU weights
     # regions['sr_vbf_no_pu'] = copy.deepcopy(regions['sr_vbf'])
diff --git a/bucoffea/vbfhinv/vbfhinvProcessor.py b/bucoffea/vbfhinv/vbfhinvProcessor.py
index e653a4073..86b219f10 100644
--- a/bucoffea/vbfhinv/vbfhinvProcessor.py
+++ b/bucoffea/vbfhinv/vbfhinvProcessor.py
@@ -199,7 +199,7 @@ def process(self, df):
         # Check out setup_candidates for filtering details
         met_pt, met_phi, ak4, bjets, muons, electrons, taus, photons = setup_candidates(df, cfg)
         # Set up ParticleNet
-        pfcands = load_pf_cands(df,[muons,electrons])
+        pfcands = load_pf_cands(df,[muons,electrons, taus, photons])
         session = load_particlenet_model(bucoffea_path("particlenet_models/model_ops12.onnx"))
 
         # Filtering ak4 jets according to pileup ID
@@ -371,6 +371,8 @@ def get_more_forward_jeteta(diak4):
         selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
         selection.add('dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
         selection.add('detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)
+        selection.add('highdphijj', df['dphijj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.HIGHDPHI)
+        selection.add('highdetajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.HIGHDETA)
 
         # Reject events where the leading jet has momentum > 6.5 TeV
         selection.add('leadak4_clean', leadak4_clean.any())

From b6894d099d255d3b4088a3a2be3f571e5e2d29b4 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 22:57:39 +0100
Subject: [PATCH 32/43] add plotting functions

---
 bucoffea/plot/util.py | 90 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 7 deletions(-)

diff --git a/bucoffea/plot/util.py b/bucoffea/plot/util.py
index 95033265d..c0b1dc76d 100644
--- a/bucoffea/plot/util.py
+++ b/bucoffea/plot/util.py
@@ -427,6 +427,12 @@ def load_xs(ulxs=True):
     xs.update(tmp)
     return xs
 
+def energy(year):
+    year = str(year)
+    if re.search(r'Run2|(2016|2017|2018)', year): return '(13 TeV)'
+    if re.search(r'Run3|(2022|2023|2024)', year): return '(13.6 TeV)'
+    raise ValueError(f'Unexpected year for energy: {year}')
+
 def lumi(year, mcscale=1):
     """Golden JSON luminosity per for given year
 
@@ -435,12 +441,11 @@ def lumi(year, mcscale=1):
     :return: Golden JSON luminosity for that year in pb (!)
     :rtype: float
     """
-    if year==2018:
-        return 59.7 * mcscale
-    if year==2017:
-        return 41.5 * mcscale
-    if year==2016:
-        return 35.9 * mcscale
+    if year=='Run2': lumi = 138
+    elif year==2016: lumi = 35.9
+    elif year==2017: lumi = 41.5
+    elif year==2018: lumi = 59.7
+    return lumi*mcscale
 
 def scale_xs_lumi(histogram, mcscale=1, scale_lumi=True, ulxs=True):
     """MC normalization so that it's ready to compare to data
@@ -480,6 +485,8 @@ def fig_ratio():
     :rtype: tuple(Figure, axes, axes)
     """
     fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
+    ax_cosmetics(ax)
+    ax_cosmetics(rax)
     return fig, ax, rax
 
 def fig_double_ratio():
@@ -554,4 +561,73 @@ def dump_info(args,outdir):
         f.write('Command line arguments:\n\n')
         cli = vars(args)
         for arg, val in cli.items():
-            f.write(f'{arg}: {val}\n')
\ No newline at end of file
+            f.write(f'{arg}: {val}\n')
+
+
+def create_legend(ax, legend_title, legend_labels=None, colors=None):
+    # Update legend labels and plot styles
+    handles, labels = ax.get_legend_handles_labels()
+    for handle, label in zip(handles, labels):
+        if legend_labels is not None:
+            for datasetregex, new_label in legend_labels.items():
+                if re.match(datasetregex, label):
+                    handle.set_label(new_label)
+        if colors is not None:
+            for k, col in colors.items():
+                if re.match(k, label):
+                    handle.set_color(col)
+                    handle.set_linestyle('-')
+                    handle.set_edgecolor('k')
+                    break
+    ax.legend(title=legend_title, handles=handles, ncol=2)
+
+
+def calculate_data_mc_ratio(sumw_data, sumw2_data, sumw_mc):
+    from coffea.hist import poisson_interval
+    r = sumw_data / sumw_mc
+    rerr = np.abs(poisson_interval(r, sumw2_data / sumw_mc**2) - r)
+    r[np.isnan(r) | np.isinf(r)] = 0.
+    rerr[np.isnan(rerr) | np.isinf(rerr)] = 0.
+    return (r, rerr)
+
+
+def ax_cosmetics(ax):
+    ax.tick_params(axis='both', direction='in', which='both')
+    ax.xaxis.set_ticks_position('both')
+    ax.yaxis.set_ticks_position('both')
+
+def ratio_cosmetics(ax, yaxis='Data / MC'):
+    from matplotlib.ticker import MultipleLocator
+    ax.set_ylabel(yaxis)
+    ax.set_ylim(0.5,1.5)
+    ax.yaxis.set_major_locator(MultipleLocator(0.2))
+    ax.yaxis.set_minor_locator(MultipleLocator(0.1))
+    ax.grid(axis='y',which='both',linestyle='--')
+    ax.axhline(1., xmin=0, xmax=1, color=(0,0,0,0.4), ls='--')
+
+
+def set_cms_text(ax, text='$\\bf{CMS}$ internal'):
+    ax.text(0., 1., text,
+            fontsize=14,
+            horizontalalignment='left',
+            verticalalignment='bottom',
+            transform=ax.transAxes
+            )
+
+def set_lumi_text(ax, year=None, mcscale=1, extratext='VBF', size=None):
+    if year is None: year ='Run2'
+    text = f'{str(lumi(year, mcscale))} fb$^{{-1}}$ {energy(year)}'
+    if extratext!='' and extratext is not None:
+        text = extratext+', '+text
+    ax.text(1., 1., text,
+            fontsize=14,
+            horizontalalignment='right',
+            verticalalignment='bottom',
+            transform=ax.transAxes,
+            size=None,
+            )
+
+def set_cms_style(ax, text='$\\bf{CMS}$ internal', year=None, mcscale=1, extratext='VBF', size=None):
+    ax_cosmetics(ax)
+    set_cms_text(ax, text=text)
+    set_lumi_text(ax, year=year, mcscale=mcscale, extratext=extratext, size=size)
\ No newline at end of file

From c7d36bb05c4cac6d5aefee6b33ab866ee82df254 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 23 Mar 2023 22:59:41 +0100
Subject: [PATCH 33/43] simplify plotting macros

---
 bucoffea/plot/plotter.py                      | 141 +++++-------------
 .../stack_plot/get_hf_noise_estimate.py       |  68 ++-------
 2 files changed, 54 insertions(+), 155 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index 9d3ef6b62..b8e0c720a 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -1,21 +1,12 @@
 #!/usr/bin/env python
-import argparse
-import os
-import re
-import sys
-import uproot
+import os, re, uproot
 import numpy as np
 import mplhep as hep
 
-from collections import OrderedDict
 from matplotlib import pyplot as plt
-from matplotlib.ticker import MultipleLocator
 from coffea import hist
 from coffea.hist import poisson_interval
-from bucoffea.plot.util import merge_datasets, merge_extensions, scale_xs_lumi, fig_ratio, lumi
-from bucoffea.helpers.paths import bucoffea_path
-from klepto.archives import dir_archive
-from pprint import pprint
+from bucoffea.plot.util import merge_datasets, merge_extensions, scale_xs_lumi, fig_ratio, create_legend, calculate_data_mc_ratio, set_cms_style, ratio_cosmetics
 
 pjoin = os.path.join
 
@@ -120,20 +111,24 @@
     'particlenet_score' : (1e-1,1e5),
 }
 
+gjet = '$\\gamma$+jets'
+Zll  = 'Z$\\rightarrow\\ell\\ell$'
+Znn  = 'Z$\\rightarrow\\nu\\nu$'
+Wln  = 'W$\\rightarrow\\ell\\nu$'
 legend_labels = {
-    'GJets_(DR-0p4).*' : "QCD $\\gamma$+jets",
-    '(VBFGamma|GJets_SM.*EWK).*' : "EWK $\\gamma$+jets",
-    'DY.*' : "QCD Z$\\rightarrow\\ell\\ell$",
-    'EWKZ.*ZToLL.*' : "EWK Z$\\rightarrow\\ell\\ell$",
-    'WN*J.*LNu.*' : "QCD W$\\rightarrow\\ell\\nu$",
-    'EWKW.*LNu.*' : "EWK W$\\rightarrow\\ell\\nu$",
-    'ZN*JetsToNuNu.*.*' : "QCD Z$\\rightarrow\\nu\\nu$",
-    'EWKZ.*ZToNuNu.*' : "EWK Z$\\rightarrow\\nu\\nu$",
-    'QCD.*' : "QCD Estimation",
-    'Top.*' : "Top quark",
-    'Diboson.*' : "WW/WZ/ZZ",
-    'MET|Single(Electron|Photon|Muon)|EGamma.*' : "Data",
-    'VBF_HToInv.*' : "VBF H(inv)",
+    'GJets_(DR-0p4).*':                          f'QCD {gjet}',
+    '(VBFGamma|GJets_SM.*EWK).*':                f'EWK {gjet}',
+    'DY.*':                                      f'QCD {Zll}',
+    'EWKZ.*ZToLL.*':                             f'EWK {Zll}',
+    'WN*J.*LNu.*':                               f'QCD {Wln}',
+    'EWKW.*LNu.*':                               f'EWK {Wln}',
+    'ZN*JetsToNuNu.*.*':                         f'QCD {Znn}',
+    'EWKZ.*ZToNuNu.*':                           f'EWK {Znn}',
+    'QCD.*':                                     "QCD Estimation",
+    'Top.*':                                     "Top quark",
+    'Diboson.*':                                 "WW/WZ/ZZ",
+    'MET|Single(Electron|Photon|Muon)|EGamma.*': "Data",
+    'VBF_HToInv.*':                              "VBF H(inv)",
 }
 
 legend_labels_IC = {
@@ -151,7 +146,7 @@
 }
 
 legend_titles = {
-    'sr_vbf' : 'VBF Signal Region',
+    'sr_vbf'    : 'VBF Signal Region',
     'cr_1m_vbf' : r'VBF $1\mu$ Region',
     'cr_2m_vbf' : r'VBF $2\mu$ Region',
     'cr_1e_vbf' : r'VBF $1e$ Region',
@@ -205,16 +200,14 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     scale_xs_lumi(h, ulxs=ulxs, mcscale=mcscale)
     h = merge_datasets(h)
 
-    # Rebin the histogram if necessary
+    # Rebin if necessary
+        # Specifically rebin dphitkpf distribution: Merge the bins in the tails
+    # Annoying approach but it works (due to float precision problems)
     if distribution in binnings.keys():
         new_ax = binnings[distribution]
         h = h.rebin(new_ax.name, new_ax)
-
-    # Specifically rebin dphitkpf distribution: Merge the bins in the tails
-    # Annoying approach but it works (due to float precision problems)
     elif distribution == 'dphitkpf':
         new_bins = [ibin.lo for ibin in h.identifiers('dphi') if ibin.lo < 2] + [3.5]
-        
         new_ax = Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
@@ -226,20 +219,6 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
 
     h_data = h.integrate('region', data_region)
     h_mc = h.integrate('region', mc_region)
-    
-    # Get the QCD template (HF-noise estimation), only to be used in the signal region
-    if 'sr_vbf' in data_region:
-        # If a path to HF-noise estimate file has been given, use it!
-        # Otherwise, take the one from the relevant output directory
-        if qcd_file:
-            qcdfilepath = qcd_file
-        else:
-            qcdfilepath = pjoin(outtag,'hf_estimate','vbfhinv_hf_estimate.root')
-        
-        # Make sure that the HF-noise estimate ROOT file points to a valid path
-        assert os.path.exists(qcdfilepath), f"HF-noise file cannot be found: {qcdfilepath}"
-
-        h_qcd = uproot.open(qcdfilepath)[f'hf_estimate_{distribution}_{year}']
 
     data_err_opts = {
         'linestyle':'none',
@@ -258,12 +237,18 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     }
 
     for dataset in datasets:
-        sumw = h_mc.integrate('dataset', dataset).values(overflow=overflow)[()]
-
+        sumw = h_mc.integrate('dataset', dataset).values(overflow=overflow)[()]        
         plot_info['sumw'].append(sumw)
 
-    # Add the HF-noise contribution (for signal region only)
-    if data_region == 'sr_vbf':
+    # Get the QCD template (HF-noise estimation), only to be used in the signal region
+    if 'sr_vbf' in data_region:
+        qcdfilepath = pjoin(outtag,'hf_estimate','vbfhinv_hf_estimate.root')
+        if qcd_file:
+            qcdfilepath = qcd_file
+        assert os.path.exists(qcdfilepath), f"HF-noise file cannot be found: {qcdfilepath}"
+        h_qcd = uproot.open(qcdfilepath)[f'hf_estimate_{distribution}_{year}']
+        # Add the HF-noise contribution (for signal region only)
+        
         plot_info['label'].insert(6, 'HF Noise Estimate')
         plot_info['sumw'].insert(6, h_qcd.values * mcscale)
 
@@ -325,29 +310,8 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     if distribution == 'mjj':
         ax.set_xlim(left=0.)
 
-    ax.yaxis.set_ticks_position('both')
-
-    # Update legend labels and plot styles
-    handles, labels = ax.get_legend_handles_labels()
-    for handle, label in zip(handles, labels):
-        for datasetregex, new_label in legend_labels.items():
-            col = None
-            if re.match(datasetregex, label):
-                handle.set_label(new_label)
-            for k, v in colors.items():
-                if re.match(k, label):
-                    col = v
-                    break
-
-            if col:
-                handle.set_color(col)
-                handle.set_linestyle('-')
-                handle.set_edgecolor('k')
-
-    try:
-        ax.legend(title=legend_titles[data_region], handles=handles, ncol=2)
-    except KeyError:
-        ax.legend(handles=handles, ncol=2)
+    create_legend(ax, legend_titles.get(data_region, None), legend_labels, colors)
+    set_cms_style(ax, year=year, mcscale=mcscale)
 
     # Plot ratio
     h_data = h_data.integrate('dataset', data)
@@ -357,14 +321,10 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     sumw_mc = h_mc.values(overflow=overflow)[()]
     
     # Add the HF-noise contribution to the background expectation
-    if data_region == 'sr_vbf':
+    if 'sr_vbf' in data_region:
         sumw_mc = sumw_mc + h_qcd.values * mcscale
 
-    r = sumw_data / sumw_mc
-    rerr = np.abs(poisson_interval(r, sumw2_data / sumw_mc**2) - r)
-
-    r[np.isnan(r) | np.isinf(r)] = 0.
-    rerr[np.isnan(rerr) | np.isinf(rerr)] = 0.
+    r, rerr = calculate_data_mc_ratio(sumw_data, sumw2_data, sumw_mc)
 
     # Actually do the plot if we're not blinded (only for SR)
     if not ('sr_vbf' in data_region and is_blind):
@@ -376,15 +336,6 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
             histtype='errorbar',
             **data_err_opts
         )
-    
-    rax.set_ylabel('Data / MC')
-    rax.set_ylim(0.5,1.5)
-    loc1 = MultipleLocator(0.2)
-    loc2 = MultipleLocator(0.1)
-    rax.yaxis.set_major_locator(loc1)
-    rax.yaxis.set_minor_locator(loc2)
-
-    rax.yaxis.set_ticks_position('both')
 
     sumw_denom, sumw2_denom = h_mc.values(overflow=overflow, sumw2=True)[()]
 
@@ -426,23 +377,7 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
 
         rax.legend()
 
-    rax.grid(axis='y',which='both',linestyle='--')
-
-    rax.axhline(1., xmin=0, xmax=1, color=(0,0,0,0.4), ls='--')
-
-    fig.text(0., 1., '$\\bf{CMS}$ internal',
-                fontsize=14,
-                horizontalalignment='left',
-                verticalalignment='bottom',
-                transform=ax.transAxes
-               )
-
-    fig.text(1., 1., f'VBF, {lumi(year, mcscale):.1f} fb$^{{-1}}$ ({year})',
-                fontsize=14,
-                horizontalalignment='right',
-                verticalalignment='bottom',
-                transform=ax.transAxes
-               )
+    ratio_cosmetics(ax=rax)
 
     outdir = pjoin(outtag,data_region)
     os.system('mkdir -p '+outdir)
diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 6f66e69e8..0273e78dd 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -9,12 +9,12 @@
 from datetime import datetime
 
 from matplotlib import pyplot as plt
-from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi, dump_info
+from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi, dump_info, create_legend, set_cms_style
 from coffea import hist
 from klepto.archives import dir_archive
 from pprint import pprint
 from distributions import distributions
-from bucoffea.plot.plotter import binnings, legend_labels
+from bucoffea.plot.plotter import binnings, legend_labels, colors
 
 pjoin = os.path.join
 
@@ -25,23 +25,25 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
     acc.load(distribution)
     h = acc[distribution]
 
-    h = merge_extensions(h, acc, reweight_pu=False)
-    scale_xs_lumi(h)
-    h = merge_datasets(h)
-
+    # Set up overflow bin for mjj
     overflow = 'none'
     if distribution == 'mjj':
         overflow = 'over'
 
-    # Rebin if neccessary
+    # Pre-processing of the histogram, merging datasets, scaling w.r.t. XS and lumi
+    h = merge_extensions(h, acc, reweight_pu=False)
+    scale_xs_lumi(h)
+    h = merge_datasets(h)
+
+    # Rebin if necessary
+        # Specifically rebin dphitkpf distribution: Merge the bins in the tails
+    # Annoying approach but it works (due to float precision problems)
     if distribution in binnings.keys():
         new_ax = binnings[distribution]
         h = h.rebin(new_ax.name, new_ax)
-    
     elif distribution == 'dphitkpf':
         new_bins = [ibin.lo for ibin in h.identifiers('dphi') if ibin.lo < 2] + [3.5]
-        
-        new_ax = hist.Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
+        new_ax = Bin('dphi', r'$\Delta\phi_{TK,PF}$', new_bins)
         h = h.rebin('dphi', new_ax)
 
     # Get data and MC yields in the QCD CR
@@ -63,6 +65,7 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
         }
 
         fig, ax = plt.subplots()
+
         hist.plot1d(
             h[data], 
             overlay='dataset', 
@@ -81,34 +84,9 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
 
         ax.set_yscale('log')
         ax.set_ylim(1e-2,1e6)
-        ax.yaxis.set_ticks_position('both')
-
-        handles, labels = ax.get_legend_handles_labels()
-
-        for handle, label in zip(handles, labels):
-            for regex, newlabel in legend_labels.items():
-                if re.match(regex, label):
-                    handle.set_label(newlabel)
-                    if newlabel != 'Data':
-                        handle.set_linestyle('-')
-                        handle.set_edgecolor('k')
-                    continue
-
-        ax.legend(title='QCD CR', ncol=2, handles=handles)
-
-        ax.text(0.,1.,r'QCD CR $\times$ $CR \rightarrow SR$ TF',
-            fontsize=14,
-            ha='left',
-            va='bottom',
-            transform=ax.transAxes
-        )
 
-        ax.text(1.,1.,year,
-            fontsize=14,
-            ha='right',
-            va='bottom',
-            transform=ax.transAxes
-        )
+        create_legend(ax, legend_title='QCD CR', legend_labels=legend_labels, colors=colors)
+        set_cms_style(ax, year=year, extratext=r'QCD CR $\times$ $CR \rightarrow SR$ TF', size = 0.75)
 
         outpath = pjoin(outdir, f'{region_name}_{distribution}_{year}.pdf')
         fig.savefig(outpath)
@@ -121,26 +99,12 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
         h_mc = h.integrate('dataset', mc)        
         h_mc.scale(-1)
         h_qcd.add(h_mc)
-
         hist.plot1d(h_qcd, ax=ax, overflow=overflow)
         ax.set_yscale('log')
         ax.set_ylim(1e-2,1e6)
         ax.get_legend().remove()
+        set_cms_style(ax, year=year, extratext='QCD Estimate in SR')
         
-        ax.text(0.,1.,'QCD Estimate in SR',
-            fontsize=14,
-            ha='left',
-            va='bottom',
-            transform=ax.transAxes
-        )
-
-        ax.text(1.,1.,year,
-            fontsize=14,
-            ha='right',
-            va='bottom',
-            transform=ax.transAxes
-        )
-
         outpath = pjoin(outdir, f'qcd_estimation_{distribution}_{year}.pdf')
         fig.savefig(outpath)
         plt.close(fig)

From b30d19786e4ca5ba601d529e14da382d794373ca Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Mon, 27 Mar 2023 10:01:02 +0200
Subject: [PATCH 34/43] add disk request for crab

---
 bucoffea/execute/buexec | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index f9df23773..996028b10 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -275,6 +275,7 @@ def do_submit(args):
                 # "log" :f"/dev/null",
                 "request_cpus" : str(args.jobs),
                 "request_memory" : str(args.memory if args.memory else args.jobs*2100),
+                "request_disk": str(args.disk*1024*1024),
                 "+MaxRuntime" : f"{60*60*24}",
                 "on_exit_remove" : "((ExitBySignal == False) && (ExitCode == 0)) || (NumJobStarts >= 2)",
                 }
@@ -345,6 +346,7 @@ def main():
     parser_submit.add_argument('--async', action="store_true", default=False, help='Deprecated. Use --asynchronous instead.')
     parser_submit.add_argument('--debug', action="store_true", default=False, help='Print debugging info.')
     parser_submit.add_argument('--memory',type=int, default=None, help='Memory to request (in MB). Default is 2100 * number of cores.')
+    parser_submit.add_argument('--disk',type=int, default=2, help='Disk to request (in MB). Default is 2')
     parser_submit.set_defaults(func=do_submit)
 
     args = parser.parse_args()

From d5426e1278de9c53869c596a067cc1bb956e1625 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Mon, 27 Mar 2023 10:18:17 +0200
Subject: [PATCH 35/43] bug fix

---
 bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index 0273e78dd..c89c9a8f3 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -17,6 +17,7 @@
 from bucoffea.plot.plotter import binnings, legend_labels, colors
 
 pjoin = os.path.join
+Bin = hist.Bin
 
 def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2018], region_name='cr_vbf_qcd'):
     '''

From 49ea8cd8fc687b8c13dd22621c469ab52684c15e Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Wed, 29 Mar 2023 11:35:49 +0200
Subject: [PATCH 36/43] add new shapes and regions

---
 bucoffea/execute/dataset_split.py             |  54 ++++-----
 bucoffea/plot/plotter.py                      |  21 ++--
 bucoffea/plot/shape_comparison.py             | 112 ++++++++++++++++++
 .../plot/studies/stack_plot/distributions.py  |  20 ++--
 .../plot/studies/stack_plot/plot_data_mc.py   |  26 ++--
 bucoffea/plot/util.py                         |  28 +++--
 bucoffea/vbfhinv/definitions.py               |   6 +-
 7 files changed, 203 insertions(+), 64 deletions(-)
 create mode 100644 bucoffea/plot/shape_comparison.py

diff --git a/bucoffea/execute/dataset_split.py b/bucoffea/execute/dataset_split.py
index 669103886..755667c52 100644
--- a/bucoffea/execute/dataset_split.py
+++ b/bucoffea/execute/dataset_split.py
@@ -1,33 +1,33 @@
 def get_files_per_job(dataset,time_per_job = 3):
     # nfile per hour of running time
     filesperjob_dict ={ 'GluGlu_HToInvisible': 1,
-                        'ttH_HToInvisible': 1, 
-                        'VBF_HToInvisible': 3, 
-                        'WminusH_WToQQ_HToInvisible': 8, 
-                        'WplusH_WToQQ_HToInvisible': 3, 
-                        'ZH_ZToQQ_HToInvisible': 3, 
-                        'DYJetsToLL_LHEFilterPtZ-0To50': 12, 
-                        'DYJetsToLL_LHEFilterPtZ-50To100': 10, 
-                        'DYJetsToLL_LHEFilterPtZ-100To250': 5, 
-                        'DYJetsToLL_LHEFilterPtZ-250To400': 1, 
-                        'DYJetsToLL_LHEFilterPtZ-400To650': 1, 
-                        'DYJetsToLL_LHEFilterPtZ-650ToInf': 1, 
-                        'EWKWMinus2Jets_WToLNu': 4, 
-                        'EWKWPlus2Jets_WToLNu': 3, 
-                        'EWKZ2Jets_ZToLL': 3, 
-                        'EWKZ2Jets_ZToNuNu': 4, 
-                        'GJets_DR-0p4': 30, 
-                        'VBFGamma_5f_DipoleRecoil-mg': 10, 
-                        'WJetsToLNu_Pt-100To250': 4, 
-                        'WJetsToLNu_Pt-250To400': 1, 
-                        'WJetsToLNu_Pt-400To600': 1, 
-                        'WJetsToLNu_Pt-600ToInf': 1, 
-                        'Z1JetsToNuNu': 2, 
-                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150': 20, 
-                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-150To250': 1, 
-                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400': 1, 
-                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-400ToInf': 1, 
-                        'EGamma': 12, 
+                        'ttH_HToInvisible': 1,
+                        'VBF_HToInvisible': 3,
+                        'WminusH_WToQQ_HToInvisible': 8,
+                        'WplusH_WToQQ_HToInvisible': 3,
+                        'ZH_ZToQQ_HToInvisible': 3,
+                        'DYJetsToLL_LHEFilterPtZ-0To50': 12,
+                        'DYJetsToLL_LHEFilterPtZ-50To100': 10,
+                        'DYJetsToLL_LHEFilterPtZ-100To250': 5,
+                        'DYJetsToLL_LHEFilterPtZ-250To400': 1,
+                        'DYJetsToLL_LHEFilterPtZ-400To650': 1,
+                        'DYJetsToLL_LHEFilterPtZ-650ToInf': 1,
+                        'EWKWMinus2Jets_WToLNu': 4,
+                        'EWKWPlus2Jets_WToLNu': 3,
+                        'EWKZ2Jets_ZToLL': 3,
+                        'EWKZ2Jets_ZToNuNu': 4,
+                        'GJets_DR-0p4': 30,
+                        'VBFGamma_5f_DipoleRecoil-mg': 10,
+                        'WJetsToLNu_Pt-100To250': 4,
+                        'WJetsToLNu_Pt-250To400': 1,
+                        'WJetsToLNu_Pt-400To600': 1,
+                        'WJetsToLNu_Pt-600ToInf': 1,
+                        'Z1JetsToNuNu': 2,
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150': 20,
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-150To250': 1,
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400': 1,
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-400ToInf': 1,
+                        'EGamma': 12,
                         'MET': 5,
                         }
     # Look up files per job
diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index b8e0c720a..dfd4044bb 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -152,6 +152,11 @@
     'cr_1e_vbf' : r'VBF $1e$ Region',
     'cr_2e_vbf' : r'VBF $2e$ Region',
     'cr_g_vbf'  : r'VBF $\gamma$ Region',
+    'sr_vbf_loose': 'VBF Loose Signal Region',
+    'sr_vbf_loose_dphi': r'VBF Loose Signal Region + $\Delta\phi$',
+    'sr_vbf_loose_dphi_deta': r'VBF Loose Signal Region + $\Delta\phi-\Delta\eta$',
+    'cr_vbf_highdphi': r'VBF large $\Delta\phi$ Region', ,
+    'cr_vbf_highdphi_highdeta': r'VBF large $\Delta\phi-\Delta\eta$ Region', ,
 }
 
 colors = {
@@ -183,6 +188,14 @@
     '.*HF (N|n)oise.*' : (174, 126, 230),
 }
 
+data_err_opts = {
+        'linestyle':'none',
+        'marker': '.',
+        'markersize': 10.,
+        'color':'k',
+        'elinewidth': 1,
+    }
+
 def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distribution='mjj', plot_signal=True, mcscale=1, binwnorm=None, fformats=['pdf'], qcd_file=None, jes_file=None, ulxs=True, is_blind=False):
     """
     Main plotter function to create a stack plot of data to background estimation (from MC).
@@ -220,14 +233,6 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     h_data = h.integrate('region', data_region)
     h_mc = h.integrate('region', mc_region)
 
-    data_err_opts = {
-        'linestyle':'none',
-        'marker': '.',
-        'markersize': 10.,
-        'color':'k',
-        'elinewidth': 1,
-    }
-
     # Build the MC stack
     datasets = list(map(str, h[mc].identifiers('dataset')))
 
diff --git a/bucoffea/plot/shape_comparison.py b/bucoffea/plot/shape_comparison.py
new file mode 100644
index 000000000..97ca52adf
--- /dev/null
+++ b/bucoffea/plot/shape_comparison.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+import os, re
+from klepto.archives import dir_archive
+import mplhep as hep
+from matplotlib import pyplot as plt
+import numpy as np
+from bucoffea.plot.plotter import binnings, legend_titles, legend_labels
+from bucoffea.plot.util import merge_datasets_and_scale, rebin, fig_ratio, create_legend, ratio_cosmetics, ratio_unc
+from coffea import hist
+from coffea.hist import poisson_interval
+
+pjoin = os.path.join
+
+colors = {
+    'sr_vbf_no_veto_all' : 'k',
+    'sr_vbf_nodijetcut' : '#31a354', #green
+    'cr_1m_vbf' : '#2e74db', #blue
+    'cr_1e_vbf' : '#e66b0e', #orange
+    'cr_2m_vbf' : '#6a51a3', #violet
+    'cr_2e_vbf' : '#ad020a', #red
+    'cr_g_vbf' : '#4c9ea8', #water
+    'sr_vbf_loose': '#3e4042',
+    'sr_vbf_loose_dphi': '#5b5c5e',
+    'sr_vbf_loose_deta': '#a7a9ab',
+    'sr_vbf_loose_dphi_deta': '#9ba3ab',
+    'sr_vbf_highdphi': '#ebbdb5',
+    'sr_vbf_highdphi_mjj': '#e6a79c',
+    'sr_vbf_highdphi_highdeta': '#db442a',
+}
+
+def get_regions(dataset):
+    matching = {
+            'sr_vbf_no_veto_all' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            # 'sr_vbf_nodijetcut' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'cr_1e_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*|EGamma_201*.*).*'),
+            'cr_1m_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'cr_2m_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX|MET_201*.*).*'),
+            'cr_2e_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX|EGamma_201*.*).*'),
+            'cr_g_vbf' : re.compile(f'(GJets_DR-0p4.*|VBFGamma.*|QCD_data.*|EGamma_201*.*).*'),
+            'sr_vbf_loose': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_loose_dphi': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_loose_deta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_loose_dphi_deta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_highdphi': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_highdphi_mjj': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'sr_vbf_highdphi_highdeta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+        }
+    return [x for x in matching if re.match(matching[x], dataset)]
+
+def shape_comparison(inpath, distribution, year, region_ref_ = 'sr_vbf_no_veto_all'):
+    outdir = pjoin('./output/',inpath.replace('..','').replace('/',''),'shape_comparison_2')
+    os.system('mkdir -p '+outdir)
+
+    acc = dir_archive(inpath)
+    acc.load('sumw')
+    acc.load('sumw2')
+    acc.load(distribution)
+
+    histograms = acc[distribution]
+    if distribution== 'particlenet_score':
+        histograms = histograms.integrate('score_type', 'VBF-like')
+    histograms = merge_datasets_and_scale(histograms, acc, reweight_pu=False, noscale=False)
+    
+    #TODO sumw_pileup
+    histograms = rebin(histograms, distribution, binnings)
+    datasets = list(map(str, histograms.identifiers('dataset')))
+    for dataset in datasets:
+        regions = get_regions(dataset)
+        if len(regions)==0: continue
+        region_ref = region_ref_ if region_ref_ in regions else regions[0]
+        dataset_label = [label for regex, label in legend_labels.items() if re.match(regex, dataset)][0]
+        if dataset_label=='Data':
+            # regions = list(filter(lambda x: not 'sr' in x, regions))
+            # region_ref = regions[0]
+            dataset_label = dataset.replace('_'+year, '')
+        dataset_name = dataset_label.replace(' ', '_').replace('/', '_').replace('+', '').replace('$', '').replace('\\', '').replace('rightarrow', '')
+        histogram = histograms.integrate('dataset', dataset)
+        print(dataset, regions)
+        histogram.scale({k[0]:1./histogram.values()[k].sum() for k in histogram.values()}, axis='region')
+        
+        fig, ax, rax = fig_ratio()
+        xedges = histogram[region_ref].axes()[1].edges()
+
+        den, den_err = histogram[region_ref].values(sumw2=True)[(region_ref,)]
+        den_err = np.sqrt(den_err)
+        for region in regions:
+            region_label = [label for regex, label in legend_titles.items() if re.match(regex, region)][0]
+            num, num_err = histogram[region].values(sumw2=True)[(region,)]
+            num_err = np.sqrt(num_err)
+            r = num/den
+            rerr = ratio_unc(den, num, den_err, num_err)
+            r[np.isnan(r) | np.isinf(r)] = 0.
+            rerr[np.isnan(rerr) | np.isinf(rerr)] = 0.
+            hep.histplot(num, xedges, yerr=num_err, ax=ax, histtype='errorbar', label=region_label, color=colors[region])
+            hep.histplot(r, xedges, yerr=rerr, ax=rax, histtype='errorbar', color=colors[region])
+        
+        create_legend(ax, dataset_label, legend_titles)
+        ratio_cosmetics(ax=rax, yaxis ='ratio', ylims=(0.,2.0), ystep=0.5)
+        outpath = pjoin(outdir, f'{distribution}_{dataset_name}_{year}.pdf')
+        fig.savefig(outpath)
+
+
+def main():
+    inpath = '../merged_files/PFNANO_V9_17Feb23_PostNanoTools_2'
+    distribution = 'particlenet_score'
+    year = '2018'
+    shape_comparison(inpath, distribution, year)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/bucoffea/plot/studies/stack_plot/distributions.py b/bucoffea/plot/studies/stack_plot/distributions.py
index ba66cc05d..c7c12cd5b 100755
--- a/bucoffea/plot/studies/stack_plot/distributions.py
+++ b/bucoffea/plot/studies/stack_plot/distributions.py
@@ -15,14 +15,18 @@ def obj_variables(object_name, indices, vars, extravars=None):
 common_distributions = [ 'mjj', 'detajj', 'dphijj', 'recoil', 'dphijr', 'particlenet_score']
 common_distributions += obj_variables(object_name='ak4', indices=[0,1], vars=['eta','pt'])
 # common_distributions += obj_variables(object_name='ak4', indices=[''], vars=['central_eta','forward_eta'])
-
+jet_distributions = obj_variables(object_name='ak4',     indices=[0,1], vars=['nef','nhf','chf'])
 # Distributions to plot for each region
 distributions = {
-    'sr_vbf'    :         common_distributions + obj_variables(object_name='ak4',       indices=[0,1], vars=['nef','nhf','chf']),
-    'sr_vbf_nodijetcut' : common_distributions + obj_variables(object_name='ak4',       indices=[0,1], vars=['nef','nhf','chf']),
-    'cr_1m_vbf' :         common_distributions + obj_variables(object_name='muon',      indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
-    'cr_1e_vbf' :         common_distributions + obj_variables(object_name='electron',  indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
-    'cr_2m_vbf' :         common_distributions + obj_variables(object_name='muon',      indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dimuon_mass'],
-    'cr_2e_vbf' :         common_distributions + obj_variables(object_name='electron',  indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dielectron_mass'],
-    'cr_g_vbf'  :         common_distributions + obj_variables(object_name='photon',    indices=[0],   vars=['pt', 'eta', 'phi']),
+    'sr_vbf':                   common_distributions + jet_distributions,
+    'sr_vbf_loose':             common_distributions + jet_distributions,
+    'sr_vbf_loose_dphi':        common_distributions + jet_distributions,
+    'sr_vbf_loose_dphi_deta':   common_distributions + jet_distributions,
+    'cr_vbf_highdphi':          common_distributions + jet_distributions,
+    'cr_vbf_highdphi_highdeta': common_distributions + jet_distributions,
+    'cr_1m_vbf':                common_distributions + obj_variables(object_name='muon',      indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
+    'cr_1e_vbf':                common_distributions + obj_variables(object_name='electron',  indices=[],    vars=['pt', 'eta', 'phi'], extravars=['mt']),
+    'cr_2m_vbf':                common_distributions + obj_variables(object_name='muon',      indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dimuon_mass'],
+    'cr_2e_vbf':                common_distributions + obj_variables(object_name='electron',  indices=[0,1], vars=['pt', 'eta', 'phi']) + ['dielectron_mass'],
+    'cr_g_vbf' :                common_distributions + obj_variables(object_name='photon',    indices=[0],   vars=['pt', 'eta', 'phi']),
 }
\ No newline at end of file
diff --git a/bucoffea/plot/studies/stack_plot/plot_data_mc.py b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
index df20f8320..901ca0db1 100755
--- a/bucoffea/plot/studies/stack_plot/plot_data_mc.py
+++ b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
@@ -27,7 +27,11 @@ def make_plot(args):
     for year in args.years:
         data = {
             'sr_vbf' : f'MET_{year}',
-            'sr_vbf_nodijetcut' : f'MET_{year}',
+            'sr_vbf_loose' : f'MET_{year}',
+            'sr_vbf_loose_dphi' : f'MET_{year}',
+            'sr_vbf_loose_dphi_deta' : f'MET_{year}',
+            'cr_vbf_highdphi' : f'MET_{year}',
+            'cr_vbf_highdphi_highdeta' : f'MET_{year}',
             'cr_1m_vbf' : f'MET_{year}',
             'cr_2m_vbf' : f'MET_{year}',
             'cr_1e_vbf' : f'EGamma_{year}',
@@ -36,13 +40,19 @@ def make_plot(args):
         }
 
         mc = {
-            'sr_vbf_no_veto_all' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
-            'sr_vbf_nodijetcut' : re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
-            'cr_1m_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
-            'cr_1e_vbf' : re.compile(f'(EWKW.*|EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt_FXFX.*|WJetsToLNu_Pt-FXFX.*).*{year}'),
-            'cr_2m_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX).*{year}'),
-            'cr_2e_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX).*{year}'),
-            'cr_g_vbf' : re.compile(f'(GJets_DR-0p4.*|VBFGamma.*|QCD_data.*).*{year}'),
+            'sr_vbf_no_veto_all' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'sr_vbf_loose' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'sr_vbf_loose_dphi' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'sr_vbf_loose_dphi_deta' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'cr_vbf_highdphi' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'cr_vbf_highdphi_highdeta' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            
+            # 'cr_1m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*)(?!.*VBFGamma*)(?!.*GJets*)(?!.*WJetsToLNu*)(?!.*ZNJetsToNuNu*)(?!.*DYJetsToLL*)(?!.*EWKZ2Jets_ZToLL*)(?!.*EWKW2Jets_WToLNu*)(?!.*EWKZ2Jets_ZToNuNu*).*{year}'),
+            'cr_1m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*)(?!.*EWKZ2Jets_ZToNuNu*).*{year}'),
+            'cr_1e_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'cr_2m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'cr_2e_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
+            'cr_g_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
         }
 
         # Dictionary having the region -> scale factor mapping, by default this is an empty dict
diff --git a/bucoffea/plot/util.py b/bucoffea/plot/util.py
index c0b1dc76d..abcc23cc4 100644
--- a/bucoffea/plot/util.py
+++ b/bucoffea/plot/util.py
@@ -221,6 +221,9 @@ def merge_extensions(histogram, acc, reweight_pu=True, noscale=False):
     for key, value in to_remove:
         mapping[key].remove(value)
 
+    if reweight_pu:
+        if not 'sumw_pileup' in acc: acc.load('sumw_pileup')
+        if not 'nevents' in acc: acc.load('nevents')
     ### Sumw merging according to mapping
     for base, datasets in mapping.items():
         for d in datasets:
@@ -472,11 +475,18 @@ def scale_xs_lumi(histogram, mcscale=1, scale_lumi=True, ulxs=True):
     norm_dict = {mc : 1e3 * xs_map[mc] * (lumi(extract_year(mc), mcscale) if scale_lumi else 1) for mc in mcs}
     histogram.scale(norm_dict, axis='dataset')
 
-# def merge_and_norm(histogram, acc):
-#     histogram = merge_extensions(histogram, acc)
-#     scale_xs_lumi(histogram)
-#     histogram = merge_datasets(histogram)
-#     return histogram
+def merge_datasets_and_scale(histogram, acc, reweight_pu=True, noscale=False):
+    # Pre-processing of the histogram, merging datasets, scaling w.r.t. XS and lumi
+    histogram = merge_extensions(histogram, acc, reweight_pu=reweight_pu, noscale=noscale)
+    scale_xs_lumi(histogram)
+    histogram = merge_datasets(histogram)
+    return histogram
+
+def rebin(histogram, name, binnings):
+    if name in binnings.keys():
+        new_ax = binnings[name]
+        histogram = histogram.rebin(new_ax.name, new_ax)
+    return histogram
 
 def fig_ratio():
     """Shortcut to create figure with ratio and main panels
@@ -596,12 +606,12 @@ def ax_cosmetics(ax):
     ax.xaxis.set_ticks_position('both')
     ax.yaxis.set_ticks_position('both')
 
-def ratio_cosmetics(ax, yaxis='Data / MC'):
+def ratio_cosmetics(ax, yaxis='Data / MC', ylims=(0.5,1.5), ystep=None, ystep_minor=None):
     from matplotlib.ticker import MultipleLocator
     ax.set_ylabel(yaxis)
-    ax.set_ylim(0.5,1.5)
-    ax.yaxis.set_major_locator(MultipleLocator(0.2))
-    ax.yaxis.set_minor_locator(MultipleLocator(0.1))
+    ax.set_ylim(*ylims)
+    if ystep: ax.yaxis.set_major_locator(MultipleLocator(ystep))
+    if ystep_minor: ax.yaxis.set_minor_locator(MultipleLocator(ystep_minor))
     ax.grid(axis='y',which='both',linestyle='--')
     ax.axhline(1., xmin=0, xmax=1, color=(0,0,0,0.4), ls='--')
 
diff --git a/bucoffea/vbfhinv/definitions.py b/bucoffea/vbfhinv/definitions.py
index f2f0f4a70..29615ed7b 100644
--- a/bucoffea/vbfhinv/definitions.py
+++ b/bucoffea/vbfhinv/definitions.py
@@ -338,12 +338,10 @@ def append_items(original_list, items_to_add):
     if cfg.RUN.REGION_WITHOUT_DIJET_CUTS:
         regions['sr_vbf_loose'] = remove_items(regions['sr_vbf'], ['mjj','detajj','dphijj'])
         regions['sr_vbf_loose_dphi']       = append_items(regions['sr_vbf_loose'], ['dphijj'])
-        regions['sr_vbf_loose_deta']       = append_items(regions['sr_vbf_loose'], ['detajj'])
         regions['sr_vbf_loose_dphi_deta']  = append_items(regions['sr_vbf_loose'], ['dphijj', 'detajj'])
 
-        regions['sr_vbf_highdphi']          = append_items(regions['sr_vbf_loose'], ['highdphijj'])
-        regions['sr_vbf_highdphi_mjj']      = append_items(regions['sr_vbf_highdphi'], ['mjj'])
-        regions['sr_vbf_highdphi_highdeta'] = append_items(regions['sr_vbf_highdphi'], ['highdetajj'])
+        regions['cr_vbf_highdphi']          = append_items(regions['sr_vbf_loose'], ['highdphijj'])
+        regions['cr_vbf_highdphi_highdeta'] = append_items(regions['sr_vbf_loose'], ['highdphijj', 'highdetajj'])
 
     # SR without PU weights
     # regions['sr_vbf_no_pu'] = copy.deepcopy(regions['sr_vbf'])

From c11d4629e4c326a6616e6ad0447169e484717c98 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 30 Mar 2023 14:19:56 +0200
Subject: [PATCH 37/43] more conservative splitting

---
 bucoffea/execute/buexec                    |  3 +++
 bucoffea/execute/dataset_split.py          | 22 +++++++++++-----------
 bucoffea/execute/validate_dataset_split.py | 13 +++++++------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index 996028b10..23e734b73 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -222,6 +222,9 @@ def do_submit(args):
                 filesperjob = args.filesperjob
             nchunk = math.ceil(len(files)/filesperjob)
             chunks = chunk_by_files(files, nchunk=int(nchunk))
+            if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' == dataset:
+                # This is very annoying, but even 1 file is generally too large
+                chunks = chunk_by_events(files, chunksize='1000000', workers=8)
         else:
             chunks = chunk_by_events(files, chunksize=args.eventsperjob, workers=8)
         for ichunk, chunk in enumerate(chunks):
diff --git a/bucoffea/execute/dataset_split.py b/bucoffea/execute/dataset_split.py
index 755667c52..2615a3c23 100644
--- a/bucoffea/execute/dataset_split.py
+++ b/bucoffea/execute/dataset_split.py
@@ -2,12 +2,12 @@ def get_files_per_job(dataset,time_per_job = 3):
     # nfile per hour of running time
     filesperjob_dict ={ 'GluGlu_HToInvisible': 1,
                         'ttH_HToInvisible': 1,
-                        'VBF_HToInvisible': 3,
-                        'WminusH_WToQQ_HToInvisible': 8,
-                        'WplusH_WToQQ_HToInvisible': 3,
-                        'ZH_ZToQQ_HToInvisible': 3,
-                        'DYJetsToLL_LHEFilterPtZ-0To50': 12,
-                        'DYJetsToLL_LHEFilterPtZ-50To100': 10,
+                        'VBF_HToInvisible': 1,
+                        'WminusH_WToQQ_HToInvisible': 1,
+                        'WplusH_WToQQ_HToInvisible': 1,
+                        'ZH_ZToQQ_HToInvisible': 1,
+                        'DYJetsToLL_LHEFilterPtZ-0To50': 10,
+                        'DYJetsToLL_LHEFilterPtZ-50To100': 7,
                         'DYJetsToLL_LHEFilterPtZ-100To250': 5,
                         'DYJetsToLL_LHEFilterPtZ-250To400': 1,
                         'DYJetsToLL_LHEFilterPtZ-400To650': 1,
@@ -18,17 +18,17 @@ def get_files_per_job(dataset,time_per_job = 3):
                         'EWKZ2Jets_ZToNuNu': 4,
                         'GJets_DR-0p4': 30,
                         'VBFGamma_5f_DipoleRecoil-mg': 10,
-                        'WJetsToLNu_Pt-100To250': 4,
+                        'WJetsToLNu_Pt-100To250': 3,
                         'WJetsToLNu_Pt-250To400': 1,
                         'WJetsToLNu_Pt-400To600': 1,
                         'WJetsToLNu_Pt-600ToInf': 1,
-                        'Z1JetsToNuNu': 2,
-                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150': 20,
+                        'Z1JetsToNuNu': 1,
+                        'Z2JetsToNuNu_M-50_LHEFilterPtZ-50To150': 15,
                         'Z2JetsToNuNu_M-50_LHEFilterPtZ-150To250': 1,
                         'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400': 1,
                         'Z2JetsToNuNu_M-50_LHEFilterPtZ-400ToInf': 1,
-                        'EGamma': 12,
-                        'MET': 5,
+                        'EGamma': 8,
+                        'MET': 3,
                         }
     # Look up files per job
     keys_contained = [key for key in filesperjob_dict.keys() if key in dataset]
diff --git a/bucoffea/execute/validate_dataset_split.py b/bucoffea/execute/validate_dataset_split.py
index 8b6e338ff..f3474f559 100644
--- a/bucoffea/execute/validate_dataset_split.py
+++ b/bucoffea/execute/validate_dataset_split.py
@@ -3,7 +3,6 @@
 
 submitted = []
 info = {}
-nsub = 0
 samples = ['GluGlu_HToInvisible',
             'ttH_HToInvisible',
             'VBF_HToInvisible',
@@ -54,25 +53,27 @@
     if len(temp)==0:
         print(sample, temp,times, glob.glob("submission/PFNANO_V9_17Feb23_PostNanoTools/files/input_*"+sample+"*txt"))
         continue
+    if len(times)>3:
+        times.remove(np.min(times)) # to be conservative
     info[sample]['times'] = np.average(times)
     info[sample]['err'] = np.std(times)
     info[sample]['min'] = np.min(times)
     info[sample]['max'] = np.max(times)
     info[sample]['exp'] = np.max(times)*info[sample]['nfiles']
-    info[sample]['split'] = max(1, info[sample]['exp']/(60),0)
+    info[sample]['split'] = max(1, info[sample]['exp']/(60))
     info[sample]['filesperjob'] = max(1, 1.*info[sample]['nfiles']/info[sample]['split'])
-    nsub += info[sample]['split']
     submitted += temp 
 
 all_files = glob.glob("/eos/cms/store/group/phys_higgs/vbfhiggs/PFNANO_V9_17Feb23_PostNanoTools/*/*/*/*/*root")
 print(f"Non analysed {len(list(set(all_files)-set(submitted)))} out of {len(all_files)} files")
 
-hours = 1
 nsub_tot = 0
 for sample in info.keys():
-    nsub = int(round(max(1,info[sample]['split']/hours),0))
+    n_files = int(round(info[sample]['filesperjob'],0))
+    nsub = int(round(info[sample]['nfiles']/n_files,0))
     nsub_tot += nsub
-    n_files = int(round(info[sample]['filesperjob']*hours,0))
+    if n_files==0:
+        print (sample,info[sample])
     print(f"Split by {n_files} file can be used for sample:{sample} to produce {nsub} jobs")
 print("nsub", nsub_tot)
 

From 18d3c5d54f960a35b5a48d5fa4bfe9bb04aba985 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 30 Mar 2023 14:51:32 +0200
Subject: [PATCH 38/43] bug fix

---
 bucoffea/execute/buexec | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index 23e734b73..ab4c3be6a 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -222,9 +222,9 @@ def do_submit(args):
                 filesperjob = args.filesperjob
             nchunk = math.ceil(len(files)/filesperjob)
             chunks = chunk_by_files(files, nchunk=int(nchunk))
-            if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' == dataset:
+            if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' in dataset:
                 # This is very annoying, but even 1 file is generally too large
-                chunks = chunk_by_events(files, chunksize='1000000', workers=8)
+                chunks = chunk_by_events(files, chunksize=1000000, workers=8)
         else:
             chunks = chunk_by_events(files, chunksize=args.eventsperjob, workers=8)
         for ichunk, chunk in enumerate(chunks):

From 4873ccb5a27c3453bde9438ddac9abdf5dcdc207 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Thu, 30 Mar 2023 15:12:53 +0200
Subject: [PATCH 39/43] bug fix

---
 bucoffea/execute/buexec           | 8 ++++----
 bucoffea/execute/dataset_split.py | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index ab4c3be6a..f1a440cc6 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -222,9 +222,6 @@ def do_submit(args):
                 filesperjob = args.filesperjob
             nchunk = math.ceil(len(files)/filesperjob)
             chunks = chunk_by_files(files, nchunk=int(nchunk))
-            if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' in dataset:
-                # This is very annoying, but even 1 file is generally too large
-                chunks = chunk_by_events(files, chunksize=1000000, workers=8)
         else:
             chunks = chunk_by_events(files, chunksize=args.eventsperjob, workers=8)
         for ichunk, chunk in enumerate(chunks):
@@ -277,11 +274,14 @@ def do_submit(args):
                 "log" : f"{filedir}/log_{chunkname}.txt",
                 # "log" :f"/dev/null",
                 "request_cpus" : str(args.jobs),
-                "request_memory" : str(args.memory if args.memory else args.jobs*2100),
+                "request_memory" : str(args.memory if args.memory else args.jobs*2*1024),
                 "request_disk": str(args.disk*1024*1024),
                 "+MaxRuntime" : f"{60*60*24}",
                 "on_exit_remove" : "((ExitBySignal == False) && (ExitCode == 0)) || (NumJobStarts >= 2)",
                 }
+            if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' in dataset:
+                submission_settings['request_memory'] = str(4*2*1024)
+                submission_settings['request_disk'] = str(8*1024*1024)
             if args.send_proxy:
                 submission_settings['Proxy_path'] = pjoin(proxydir,os.path.basename(proxy))
 
diff --git a/bucoffea/execute/dataset_split.py b/bucoffea/execute/dataset_split.py
index 2615a3c23..2d548af66 100644
--- a/bucoffea/execute/dataset_split.py
+++ b/bucoffea/execute/dataset_split.py
@@ -35,4 +35,6 @@ def get_files_per_job(dataset,time_per_job = 3):
     filesperjob = -1
     if len(keys_contained)==1:
         filesperjob = int(round(filesperjob_dict[keys_contained[0]] * time_per_job))
+        if 'Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400' in dataset:
+            filesperjob = 1
     return filesperjob
\ No newline at end of file

From 3ef1754a2a3d14075131dde6f45b896d9a419786 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Tue, 4 Apr 2023 20:05:13 +0200
Subject: [PATCH 40/43] small changes

---
 bucoffea/plot/plotter.py                      | 30 ++++++-----
 bucoffea/plot/shape_comparison.py             | 51 ++++++++++---------
 .../stack_plot/get_hf_noise_estimate.py       |  2 +-
 .../plot/studies/stack_plot/plot_data_mc.py   | 10 ++--
 bucoffea/plot/util.py                         |  2 +
 5 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/bucoffea/plot/plotter.py b/bucoffea/plot/plotter.py
index dfd4044bb..2688128b2 100755
--- a/bucoffea/plot/plotter.py
+++ b/bucoffea/plot/plotter.py
@@ -155,8 +155,8 @@
     'sr_vbf_loose': 'VBF Loose Signal Region',
     'sr_vbf_loose_dphi': r'VBF Loose Signal Region + $\Delta\phi$',
     'sr_vbf_loose_dphi_deta': r'VBF Loose Signal Region + $\Delta\phi-\Delta\eta$',
-    'cr_vbf_highdphi': r'VBF large $\Delta\phi$ Region', ,
-    'cr_vbf_highdphi_highdeta': r'VBF large $\Delta\phi-\Delta\eta$ Region', ,
+    'cr_vbf_highdphi': r'VBF large $\Delta\phi$ Region',
+    'cr_vbf_highdphi_highdeta': r'VBF large $\Delta\phi-\Delta\eta$ Region',
 }
 
 colors = {
@@ -237,12 +237,14 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
     datasets = list(map(str, h[mc].identifiers('dataset')))
 
     plot_info = {
-        'label' : datasets,
+        'label' : [],
         'sumw' : [],
     }
 
     for dataset in datasets:
-        sumw = h_mc.integrate('dataset', dataset).values(overflow=overflow)[()]        
+        sumw = h_mc.integrate('dataset', dataset).values(overflow=overflow)[()]
+        if sumw.sum()<=0: continue
+        plot_info['label'].append(dataset)
         plot_info['sumw'].append(sumw)
 
     # Get the QCD template (HF-noise estimation), only to be used in the signal region
@@ -285,16 +287,16 @@ def plot_data_mc(acc, outtag, year, data, mc, data_region, mc_region, distributi
 
         h_signal = h.integrate('region', mc_region)[signal]
         h_signal.scale(mcscale)
-
-        hist.plot1d(
-            h_signal,
-            ax=ax,
-            overlay='dataset',
-            overflow=overflow,
-            line_opts=signal_line_opts,
-            binwnorm=binwnorm,
-            clear=False
-        )
+        if h_signal.values(overflow=overflow)[()].sum():
+            hist.plot1d(
+                h_signal,
+                ax=ax,
+                overlay='dataset',
+                overflow=overflow,
+                line_opts=signal_line_opts,
+                binwnorm=binwnorm,
+                clear=False
+                )
 
     ax.set_yscale('log')
     if distribution == 'mjj':
diff --git a/bucoffea/plot/shape_comparison.py b/bucoffea/plot/shape_comparison.py
index 97ca52adf..cac46892d 100644
--- a/bucoffea/plot/shape_comparison.py
+++ b/bucoffea/plot/shape_comparison.py
@@ -8,9 +8,9 @@
 from bucoffea.plot.plotter import binnings, legend_titles, legend_labels
 from bucoffea.plot.util import merge_datasets_and_scale, rebin, fig_ratio, create_legend, ratio_cosmetics, ratio_unc
 from coffea import hist
-from coffea.hist import poisson_interval
 
 pjoin = os.path.join
+Bin = hist.Bin
 
 colors = {
     'sr_vbf_no_veto_all' : 'k',
@@ -20,13 +20,12 @@
     'cr_2m_vbf' : '#6a51a3', #violet
     'cr_2e_vbf' : '#ad020a', #red
     'cr_g_vbf' : '#4c9ea8', #water
-    'sr_vbf_loose': '#3e4042',
-    'sr_vbf_loose_dphi': '#5b5c5e',
-    'sr_vbf_loose_deta': '#a7a9ab',
-    'sr_vbf_loose_dphi_deta': '#9ba3ab',
-    'sr_vbf_highdphi': '#ebbdb5',
-    'sr_vbf_highdphi_mjj': '#e6a79c',
-    'sr_vbf_highdphi_highdeta': '#db442a',
+    'sr_vbf_loose_no_veto_all': '#b6c4d1',
+    'sr_vbf_loose_dphi_no_veto_all': '#86878a',
+    'sr_vbf_loose_deta_no_veto_all': '#5ad68c',
+    'sr_vbf_loose_dphi_deta_no_veto_all': '#834487',
+    'cr_vbf_highdphi': '#800080',
+    'cr_vbf_highdphi_highdeta': '#ffc107',
 }
 
 def get_regions(dataset):
@@ -38,20 +37,22 @@ def get_regions(dataset):
             'cr_2m_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX|MET_201*.*).*'),
             'cr_2e_vbf' : re.compile(f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX|EGamma_201*.*).*'),
             'cr_g_vbf' : re.compile(f'(GJets_DR-0p4.*|VBFGamma.*|QCD_data.*|EGamma_201*.*).*'),
-            'sr_vbf_loose': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_loose_dphi': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_loose_deta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_loose_dphi_deta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_highdphi': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_highdphi_mjj': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
-            'sr_vbf_highdphi_highdeta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            # 'sr_vbf_loose_no_veto_all': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            # 'sr_vbf_loose_dphi_no_veto_all': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            # 'sr_vbf_loose_dphi_deta_no_veto_all': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'cr_vbf_highdphi': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
+            'cr_vbf_highdphi_highdeta': re.compile(f'(ZNJetsToNuNu_M-50_LHEFilterPtZ-FXFX.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL.*Pt.*FXFX.*|WJetsToLNu_Pt-FXFX.*|MET_201*.*).*'),
         }
     return [x for x in matching if re.match(matching[x], dataset)]
 
 def shape_comparison(inpath, distribution, year, region_ref_ = 'sr_vbf_no_veto_all'):
-    outdir = pjoin('./output/',inpath.replace('..','').replace('/',''),'shape_comparison_2')
+    outdir = pjoin('./output/',inpath.replace('..','').replace('/',''),'shape_comparison')
     os.system('mkdir -p '+outdir)
 
+    xlabel = distribution
+    if distribution=='mjj': xlabel = 'm{jj}'
+    if 'score' in distribution: xlabel = 'DNN score'
+
     acc = dir_archive(inpath)
     acc.load('sumw')
     acc.load('sumw2')
@@ -62,7 +63,6 @@ def shape_comparison(inpath, distribution, year, region_ref_ = 'sr_vbf_no_veto_a
         histograms = histograms.integrate('score_type', 'VBF-like')
     histograms = merge_datasets_and_scale(histograms, acc, reweight_pu=False, noscale=False)
     
-    #TODO sumw_pileup
     histograms = rebin(histograms, distribution, binnings)
     datasets = list(map(str, histograms.identifiers('dataset')))
     for dataset in datasets:
@@ -76,16 +76,16 @@ def shape_comparison(inpath, distribution, year, region_ref_ = 'sr_vbf_no_veto_a
             dataset_label = dataset.replace('_'+year, '')
         dataset_name = dataset_label.replace(' ', '_').replace('/', '_').replace('+', '').replace('$', '').replace('\\', '').replace('rightarrow', '')
         histogram = histograms.integrate('dataset', dataset)
-        print(dataset, regions)
         histogram.scale({k[0]:1./histogram.values()[k].sum() for k in histogram.values()}, axis='region')
-        
-        fig, ax, rax = fig_ratio()
+        histogram = histogram.rebin('score', Bin('score', f'DNN score', 25, 0, 1))
         xedges = histogram[region_ref].axes()[1].edges()
-
+        fig, ax, rax = fig_ratio()
+        ax.set_ylabel('A.U.')
+        ax.set_xlabel(xlabel)
         den, den_err = histogram[region_ref].values(sumw2=True)[(region_ref,)]
         den_err = np.sqrt(den_err)
         for region in regions:
-            region_label = [label for regex, label in legend_titles.items() if re.match(regex, region)][0]
+            region_label = [label for regex, label in legend_titles.items() if regex==region.replace('_no_veto_all','')][0]
             num, num_err = histogram[region].values(sumw2=True)[(region,)]
             num_err = np.sqrt(num_err)
             r = num/den
@@ -95,16 +95,17 @@ def shape_comparison(inpath, distribution, year, region_ref_ = 'sr_vbf_no_veto_a
             hep.histplot(num, xedges, yerr=num_err, ax=ax, histtype='errorbar', label=region_label, color=colors[region])
             hep.histplot(r, xedges, yerr=rerr, ax=rax, histtype='errorbar', color=colors[region])
         
-        create_legend(ax, dataset_label, legend_titles)
+        create_legend(ax, dataset_label)
         ratio_cosmetics(ax=rax, yaxis ='ratio', ylims=(0.,2.0), ystep=0.5)
         outpath = pjoin(outdir, f'{distribution}_{dataset_name}_{year}.pdf')
         fig.savefig(outpath)
+        plt.close()
 
 
 def main():
-    inpath = '../merged_files/PFNANO_V9_17Feb23_PostNanoTools_2'
-    distribution = 'particlenet_score'
+    inpath = '../merged_files/PFNANO_V9_17Feb23_PostNanoTools_latest'
     year = '2018'
+    distribution = 'particlenet_score'
     shape_comparison(inpath, distribution, year)
 
 
diff --git a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
index c89c9a8f3..b786b97d7 100755
--- a/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
+++ b/bucoffea/plot/studies/stack_plot/get_hf_noise_estimate.py
@@ -123,7 +123,7 @@ def get_hf_noise_estimate(acc, outdir, outrootfile, distribution, years=[2017, 2
 def commandline():
     parser = argparse.ArgumentParser()
     parser.add_argument('inpath', help='Path to the merged input accumulator.')
-    parser.add_argument('--years', nargs='*', type=int, default=[2017,2018], help='Years to run.')
+    parser.add_argument('--years', nargs='*', type=int, default=[2018], help='Years to run.')
     parser.add_argument('--region', default='cr_vbf_qcd', help='Name of the HF-noise enriched control region as defined in the VBF H(inv) processor.')
     parser.add_argument('--distribution', default='.*', help='Regex specifying the list of distributions to run.')
     args = parser.parse_args()
diff --git a/bucoffea/plot/studies/stack_plot/plot_data_mc.py b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
index 901ca0db1..96f941a3f 100755
--- a/bucoffea/plot/studies/stack_plot/plot_data_mc.py
+++ b/bucoffea/plot/studies/stack_plot/plot_data_mc.py
@@ -46,9 +46,7 @@ def make_plot(args):
             'sr_vbf_loose_dphi_deta' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
             'cr_vbf_highdphi' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
             'cr_vbf_highdphi_highdeta' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
-            
-            # 'cr_1m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*)(?!.*VBFGamma*)(?!.*GJets*)(?!.*WJetsToLNu*)(?!.*ZNJetsToNuNu*)(?!.*DYJetsToLL*)(?!.*EWKZ2Jets_ZToLL*)(?!.*EWKW2Jets_WToLNu*)(?!.*EWKZ2Jets_ZToNuNu*).*{year}'),
-            'cr_1m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*)(?!.*EWKZ2Jets_ZToNuNu*).*{year}'),
+            'cr_1m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
             'cr_1e_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
             'cr_2m_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
             'cr_2e_vbf' : re.compile(f'^(?!.*HToInv*)(?!.*MET*)(?!.*EGamma*)(?!.*QCD_data*)(?!.*M125*).*{year}'),
@@ -91,7 +89,7 @@ def make_plot(args):
                         mc_region=mc_region,
                         distribution=distribution,
                         mcscale=mcscale,
-                        plot_signal='sr_vbf' in data_region,
+                        # plot_signal='sr_vbf' in data_region,
                         jes_file='./jec/jes_uncs.root' if args.jes else None,
                         ulxs=not args.eoyxs,
                         fformats=args.fformats,
@@ -108,9 +106,9 @@ def commandline():
     parser.add_argument('inpath', type=str, help='Input folder to use.')
     parser.add_argument('--region', type=str, default='.*', help='Regex specifying the analysis regions to plot.')
     parser.add_argument('--distribution', type=str, default='.*', help='Regex specifying the distributions to plot.')
-    parser.add_argument('--years', type=int, nargs='*', default=[2017,2018], help='Years to run on.')
+    parser.add_argument('--years', type=int, nargs='*', default=[2018], help='Years to run on.')
     parser.add_argument('--one_fifth_unblind', action='store_true', help='1/5th unblinded data.')
-    parser.add_argument('--blind', action='store_true', help='blind data.')
+    parser.add_argument('--blind', default=True, action='store_true', help='blind data.')
     parser.add_argument('--fformats', nargs='*', default=['pdf'], help='Output file format for the plots, default is PDF only.')
     parser.add_argument('--jes', action='store_true', help='Plot JES+JER uncertainty bands.')
     parser.add_argument('--eoyxs', action='store_true', help='Use EOY XS for normalization, otherwise use UL XS.')
diff --git a/bucoffea/plot/util.py b/bucoffea/plot/util.py
index abcc23cc4..77b6bd55b 100644
--- a/bucoffea/plot/util.py
+++ b/bucoffea/plot/util.py
@@ -495,6 +495,7 @@ def fig_ratio():
     :rtype: tuple(Figure, axes, axes)
     """
     fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
+    fig.patch.set_alpha(0)
     ax_cosmetics(ax)
     ax_cosmetics(rax)
     return fig, ax, rax
@@ -506,6 +507,7 @@ def fig_double_ratio():
     :rtype: tuple(Figure, axes, axes)
     """
     fig, (ax, rax1, rax2) = plt.subplots(3, 1, figsize=(7,7), gridspec_kw={"height_ratios": (2,1, 1)}, sharex=True)
+    fig.patch.set_alpha(0)
     return fig, ax, rax1, rax2
 
 

From 610a8d9dfe24d5600c3b7a45181a3cf868bb885b Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Mon, 22 May 2023 15:45:15 +0200
Subject: [PATCH 41/43] update submission

---
 bucoffea/execute/buexec           | 20 +++++++++++---------
 bucoffea/execute/dataset_split.py |  1 +
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/bucoffea/execute/buexec b/bucoffea/execute/buexec
index f1a440cc6..2393df3b1 100755
--- a/bucoffea/execute/buexec
+++ b/bucoffea/execute/buexec
@@ -92,7 +92,7 @@ def do_worker(args):
                                   processor_instance=choose_processor(args)(),
                                   executor=processor.futures_executor,
                                   executor_args={'workers': args.jobs, 'flatten': True},
-                                  chunksize=50000,
+                                  chunksize=int(args.eventsperjob),
                                  )
 
     # Save output
@@ -215,15 +215,15 @@ def do_submit(args):
 
     for dataset, files in dataset_files.items():
         print(f"Writing submission files for dataset: {dataset}.")
-
-        if args.filesperjob:
-            filesperjob = get_files_per_job(dataset,time_per_job = 3)
-            if filesperjob<0:
-                filesperjob = args.filesperjob
+        filesperjob = get_files_per_job(dataset,time_per_job = 3)
+        eventsperjob = int(args.eventsperjob)
+        if any([x in dataset for x in ['Z2JetsToNuNu_M-50_LHEFilterPtZ-250To400', 'G1Jet_LHEGpT-675ToInf']]):
+            eventsperjob = 1000
+        if args.filesperjob and filesperjob>0:
             nchunk = math.ceil(len(files)/filesperjob)
             chunks = chunk_by_files(files, nchunk=int(nchunk))
         else:
-            chunks = chunk_by_events(files, chunksize=args.eventsperjob, workers=8)
+            chunks = chunk_by_events(files, chunksize=eventsperjob, workers=8)
         for ichunk, chunk in enumerate(chunks):
             # Save input files to a txt file and send to job
             tmpfile = pjoin(subdir, filedir, f"input_{dataset}_{ichunk:03d}of{len(chunks):03d}.txt")
@@ -240,7 +240,8 @@ def do_submit(args):
                 'worker',
                 f'--dataset {dataset}',
                 f'--filelist {os.path.basename(tmpfile)}',
-                f'--chunk {ichunk}'
+                f'--chunk {ichunk}',
+                f'--eventsperjob {eventsperjob}'
             ]
 
             job_input_files = input_files + [
@@ -332,13 +333,14 @@ def main():
     parser_run.add_argument('--dataset', type=str, help='Dataset name to run over.')
     parser_run.add_argument('--filelist', type=str, help='Text file with file names to run over.')
     parser_run.add_argument('--chunk', type=str, help='Number of this chunk for book keeping.')
+    parser_run.add_argument('--eventsperjob', type=int, default=5*1e4, help='Number of events to process per job')
     parser_run.set_defaults(func=do_worker)
 
     # Arguments passed to the "submit" operation
     parser_submit = subparsers.add_parser('submit', help='Submission help')
     parser_submit.add_argument('--dataset', type=str, help='Dataset regex to use.')
     parser_submit.add_argument('--filesperjob', type=int, default=None, help='Number of files to process per job')
-    parser_submit.add_argument('--eventsperjob', type=int, default=1e6, help='Number of events to process per job')
+    parser_submit.add_argument('--eventsperjob', type=int, default=5*1e4, help='Number of events to process per job')
     parser_submit.add_argument('--name', type=str, default=None, help='Name to identify this submission')
     parser_submit.add_argument('--prefetch', action="store_true", default=False, help='Prefetch input files on worker but run over xrootd.')
     parser_submit.add_argument('--no-prefetch', action="store_true", default=False, help='DEPRECATED. Prefetching is now disabled by default. Use --prefetch to activate prefetching.')
diff --git a/bucoffea/execute/dataset_split.py b/bucoffea/execute/dataset_split.py
index 2d548af66..b38f5836a 100644
--- a/bucoffea/execute/dataset_split.py
+++ b/bucoffea/execute/dataset_split.py
@@ -17,6 +17,7 @@ def get_files_per_job(dataset,time_per_job = 3):
                         'EWKZ2Jets_ZToLL': 3,
                         'EWKZ2Jets_ZToNuNu': 4,
                         'GJets_DR-0p4': 30,
+                        'G1Jet': 30,
                         'VBFGamma_5f_DipoleRecoil-mg': 10,
                         'WJetsToLNu_Pt-100To250': 3,
                         'WJetsToLNu_Pt-250To400': 1,

From 605eac1871512c0d79018b73976afcb69c474c7d Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Mon, 22 May 2023 15:48:19 +0200
Subject: [PATCH 42/43] update creation of root file for limits

---
 bucoffea/limit/legacy_vbf.py | 33 ++++++++++++++++++---------------
 bucoffea/plot/util.py        | 23 ++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/bucoffea/limit/legacy_vbf.py b/bucoffea/limit/legacy_vbf.py
index 847ccdc6a..36907466b 100644
--- a/bucoffea/limit/legacy_vbf.py
+++ b/bucoffea/limit/legacy_vbf.py
@@ -14,7 +14,8 @@
 from bucoffea.plot.util import (
     merge_datasets, 
     merge_extensions, 
-    scale_xs_lumi, 
+    scale_xs_lumi,
+    rebin_particlenet_score,
     URTH1
     )
 
@@ -164,7 +165,9 @@ def mjj_bins_2016():
 
 def nn_score_ax() -> hist.Bin:
     """Returns the new binning for the neural network score."""
-    new_ax = hist.Bin("score", "Neural network score", 50, 0, 1)
+    # new_ax = hist.Bin("score", "Neural network score", 25, 0, 1)
+    edges = [0, 0.1, 0.12, 0.14, 0.16, 0.18, 0.2, 0.22, 0.24, 0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.4, 0.42, 0.44, 0.46, 0.48, 0.5, 0.52, 0.54, 0.56, 0.58, 0.6, 0.62, 0.64, 0.66, 0.68, 0.7, 0.72, 0.74, 0.76, 0.78, 0.8, 0.82, 0.84, 0.86, 0.88, 0.9, 0.92, 0.94, 0.96, 0.98, 1.0]
+    new_ax = hist.Bin("score", "Neural network score", edges)
     return new_ax
 
 
@@ -191,7 +194,7 @@ def export_coffea_histogram(h, overflow='over', axname='score', suppress_last_bi
     return URTH1(edges=xedges, sumw=sumw, sumw2=sumw2)
 
 def legacy_limit_input_vbf(acc,
-    distribution='cnn_score',
+    distribution='particlenet_score',
     outdir='./output', 
     unblind=False, 
     years=[2017, 2018], 
@@ -227,6 +230,7 @@ def legacy_limit_input_vbf(acc,
     if distribution == "particlenet_score":
         newax = nn_score_ax()
         axname = 'score'
+        h = rebin_particlenet_score(h)
 
         # Integrate for the VBF-like score
         h = h.integrate("score_type", "VBF-like")
@@ -234,13 +238,12 @@ def legacy_limit_input_vbf(acc,
     elif distribution == 'mjj':
         newax = hist.Bin('mjj','$M_{jj}$ (GeV)', mjj_bins_2016())
         axname = 'mjj'
+        # Rebin the distribution
+        h = h.rebin(h.axis(newax.name), newax)
     
     else:
         raise RuntimeError(f'Limit input for VBF is not supported for distribution: {distribution}')
     
-    # Rebin the distribution
-    h = h.rebin(h.axis(newax.name), newax)
-    
     h = merge_extensions(h, acc)
     scale_xs_lumi(h)
     h = merge_datasets(h)
@@ -251,7 +254,7 @@ def legacy_limit_input_vbf(acc,
 
         with open(infofile, 'w+') as infof:
             # Output ROOT file we're going to save (per year)
-            f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{year}.root'))
+            f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{distribution}_{year}.root'))
             data, mc = datasets(year, include_sr_data=unblind or one_fifth_unblind)
 
             # Loop over regions and make histograms
@@ -305,24 +308,24 @@ def legacy_limit_input_vbf(acc,
     
     # Merge the 2017 and 2018 histograms into a single file
     # under separate sub-directories
-    merge_legacy_inputs(outdir)
+    merge_legacy_inputs(outdir, distribution)
 
-def merge_legacy_inputs(outdir):
+def merge_legacy_inputs(outdir, distribution):
     '''
     Workaround for uproot's lack of subdirectory support.
     '''
 
     files = defaultdict(dict)
     for fname in os.listdir(outdir):
-        m = re.match('legacy_limit_([a-z]*)_(\d+).root', fname)
+        m = re.match(f'legacy_limit_([a-z]*)_{distribution}_(\d+).root', fname)
         if not m:
             continue
         category, year = m.groups()
-        files[year][category] = pjoin(outdir, fname)
+        files[category][year] = pjoin(outdir, fname)
 
-    outfile = r.TFile(pjoin(outdir, f'legacy_limit_vbf.root'),'RECREATE')
-    for year, ifiles in files.items():
-        for category, file in ifiles.items():
+    for category, ifiles in files.items():
+        outfile = r.TFile(pjoin(outdir, f'legacy_limit_{category}_{distribution}.root'),'RECREATE')
+        for year, file in ifiles.items():
             subdir = outfile.mkdir(f'category_{category}_{year}')
             infile = r.TFile(file)
             for key in infile.GetListOfKeys():
@@ -330,6 +333,6 @@ def merge_legacy_inputs(outdir):
                 h = key.ReadObj().Clone()
                 h.SetTitle(h.GetName())
                 h.SetDirectory(subdir)
-                h.GetXaxis().SetTitle('mjj')
+                h.GetXaxis().SetTitle(distribution)
                 suppress_negative_bins(h)
                 subdir.Write()
diff --git a/bucoffea/plot/util.py b/bucoffea/plot/util.py
index 77b6bd55b..58cfcab2b 100644
--- a/bucoffea/plot/util.py
+++ b/bucoffea/plot/util.py
@@ -3,6 +3,7 @@
 import shutil
 import random
 import re
+import math
 import csv
 import string
 
@@ -83,6 +84,26 @@ def rebin_histogram(h: hist.Hist, variable: str) -> hist.Hist:
     
     return h
 
+def rebin_particlenet_score(h):
+    """Rebin particlenet score axis. Annoying step due to float precision issues with NumPy."""
+    xedges = [0] + list(np.linspace(0.1,1,19))
+    new_bins = []
+    i = 0
+
+    for ibin in h.identifiers("score"):
+        if math.isclose(ibin.lo, xedges[i]):
+            new_bins.append(ibin.lo)
+            i += 1
+
+    # Append the high-end of the last bin
+    new_bins.append(1)
+
+    newax = hist.Bin("score", "Neural network score", new_bins)
+    h = h.rebin("score", newax)
+
+    return h
+
+
 def get_dataset_tag(dataset: str) -> str:
     mapping = {
         "MET_2017" : r"MET 2017",
@@ -642,4 +663,4 @@ def set_lumi_text(ax, year=None, mcscale=1, extratext='VBF', size=None):
 def set_cms_style(ax, text='$\\bf{CMS}$ internal', year=None, mcscale=1, extratext='VBF', size=None):
     ax_cosmetics(ax)
     set_cms_text(ax, text=text)
-    set_lumi_text(ax, year=year, mcscale=mcscale, extratext=extratext, size=size)
\ No newline at end of file
+    set_lumi_text(ax, year=year, mcscale=mcscale, extratext=extratext, size=size)

From 4920c56382c0030a85fc852b6361c5c683bdbbf5 Mon Sep 17 00:00:00 2001
From: anmalara <andrea.malara@cern.ch>
Date: Mon, 22 May 2023 15:56:50 +0200
Subject: [PATCH 43/43] update scripts

---
 bucoffea/Submit.sh      | 2 ++
 bucoffea/limit/steer.sh | 2 ++
 bucoffea/merge.sh       | 6 ++++++
 3 files changed, 10 insertions(+)
 create mode 100755 bucoffea/Submit.sh
 create mode 100755 bucoffea/limit/steer.sh
 create mode 100644 bucoffea/merge.sh

diff --git a/bucoffea/Submit.sh b/bucoffea/Submit.sh
new file mode 100755
index 000000000..2b826d43b
--- /dev/null
+++ b/bucoffea/Submit.sh
@@ -0,0 +1,2 @@
+# buexec -j1 vbfhinv submit --dataset '.*2018' --asynchronous --eventsperjob 5000000 --name "PFNANO_V9_17Feb23_PostNanoTools"
+buexec -j1 vbfhinv submit --dataset '.*2018' --asynchronous --filesperjob 1 --name "PFNANO_V9_17Feb23_PostNanoTools"
diff --git a/bucoffea/limit/steer.sh b/bucoffea/limit/steer.sh
new file mode 100755
index 000000000..1e0d8be32
--- /dev/null
+++ b/bucoffea/limit/steer.sh
@@ -0,0 +1,2 @@
+./limit.py ../merged_files/PFNANO_V9_17Feb23_PostNanoTools/  --years 2018
+./limit.py ../merged_files/PFNANO_V9_17Feb23_PostNanoTools/  --years 2018 --distribution mjj
diff --git a/bucoffea/merge.sh b/bucoffea/merge.sh
new file mode 100644
index 000000000..f2f008b50
--- /dev/null
+++ b/bucoffea/merge.sh
@@ -0,0 +1,6 @@
+input_dir="submission/PFNANO_V9_17Feb23_PostNanoTools/"
+output_dir="merged_files/PFNANO_V9_17Feb23_PostNanoTools/"
+mkdir -p ${output_dir}
+cp ${input_dir}*coffea ${output_dir}
+bumerge ${output_dir} -o ${output_dir} -j 4
+