From 991a3026fdbbc00a062ccf91b506ab2ebb518a19 Mon Sep 17 00:00:00 2001
From: Kelci Mohrman <kmohrman@nd.edu>
Date: Fri, 16 Dec 2022 15:51:09 -0500
Subject: [PATCH 1/5] Update the brittle workaround to handle the dropped convs
 rate syst

---
 analysis/topEFT/make_cr_and_sr_plots.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py
index a1eb580ea..56a4ed1bf 100644
--- a/analysis/topEFT/make_cr_and_sr_plots.py
+++ b/analysis/topEFT/make_cr_and_sr_plots.py
@@ -142,7 +142,7 @@
 
 # Some of our processes do not have rate systs split into qcd and pdf, so we just list them under qcd
 # This list keeps track of those, so we can handle them when extracting the numbers from the rate syst json
-PROC_WITH_JUST_QCD_RATE_SYST = ["tttt","ttll","ttlnu","Triboson","tWZ"]
+PROC_WITHOUT_PDF_RATE_SYST = ["tttt","ttll","ttlnu","Triboson","tWZ","convs"]
 
 yt = YieldTools()
 
@@ -235,7 +235,7 @@ def get_correlation_tag(uncertainty_name,proc_name,sample_group_map):
     if uncertainty_name in ["qcd_scale","pdf_scale"]:
         if proc_name_in_json is not None:
             if proc_name_in_json == "convs":
-                # Special case for conversions since we estimate from LO sample, we have _only_ pdf key not qcd
+                # Special case for conversions since we estimate from LO sample, we do not have qcd uncty
                 # Would be better to handle this in a more general way
                 corr_tag = None
             else:
@@ -261,7 +261,7 @@ def get_rate_systs(sample_name,sample_group_map):
 
     # Get the scale uncty from the json (if there is not an uncertainty for this sample, return 1 since the uncertainties are multiplicative)
     if scale_name_for_json is not None:
-        if scale_name_for_json in PROC_WITH_JUST_QCD_RATE_SYST:
+        if scale_name_for_json in PROC_WITHOUT_PDF_RATE_SYST:
             # Special cases for when we do not have a pdf uncty (this is a really brittle workaround)
             # NOTE Someday should fix this, it's a really hardcoded and brittle and bad workaround
             pdf_uncty = [1.0,1,0]

From 972875d9b93c45d531ca82ffa12c4be846c604dc Mon Sep 17 00:00:00 2001
From: Kelci Mohrman <kmohrman@nd.edu>
Date: Fri, 16 Dec 2022 19:49:44 -0500
Subject: [PATCH 2/5] Some preliminary code for renormfac decorrelation, to be
 continued..

---
 analysis/topEFT/make_cr_and_sr_plots.py | 58 ++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py
index 56a4ed1bf..252721dc8 100644
--- a/analysis/topEFT/make_cr_and_sr_plots.py
+++ b/analysis/topEFT/make_cr_and_sr_plots.py
@@ -321,6 +321,8 @@ def get_rate_syst_arrs(base_histo,proc_group_map):
 # Wrapper for getting plus and minus shape arrs
 def get_shape_syst_arrs(base_histo):
 
+    print("\n\nHere in get_shape_syst_arrs")
+
     # Get the list of systematic base names (i.e. without the up and down tags)
     # Assumes each syst has a "systnameUp" and a "systnameDown" category on the systematic axis
     syst_var_lst = []
@@ -331,14 +333,63 @@ def get_shape_syst_arrs(base_histo):
             if syst_name_base not in syst_var_lst:
                 syst_var_lst.append(syst_name_base)
 
+    print("syst_var_lst",syst_var_lst)
+
     # Sum each systematic's contribtuions for all samples together (e.g. the ISR for all samples is summed linearly)
     p_arr_rel_lst = []
     m_arr_rel_lst = []
     for syst_name in syst_var_lst:
+
+        if syst_name != "renormfact":continue
+
         relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst
+        print("\t",syst_name)
+        print("\t",relevant_samples_lst)
         n_arr     = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()]        # Sum of all samples for nominal variation
         u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()]   # Sum of all samples for up variation
         d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation
+
+        print("n_arr",n_arr)
+        print("sum u",u_arr_sum)
+        print("sum d",d_arr_sum)
+
+        # Uncorrelate renorm and fact across the processes (though leave processes in groups like dibosons correlated to be consistent with SR)
+        if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done
+            u_arr_sum = np.zeros_like(n_arr) # Initialize to 0
+            d_arr_sum = np.zeros_like(n_arr) # Initialize to 0
+
+            #for proc_grp in CR_GRP_MAP.keys():
+            for proc_grp in ["test"]:
+                #proc_lst = CR_GRP_MAP[proc_grp]
+                #if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here
+                #if proc_lst == []: continue
+
+                proc_lst = relevant_samples_lst
+
+                # We'll keep all signal processes as uncorrelated, as in SR
+                if proc_grp == "Signal":
+                    for proc_name in proc_lst:
+                        if proc_name not in relevant_samples_lst: continue
+                        u_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()]
+                        d_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()]
+                        u_arr_sum += u_arr*u_arr
+                        d_arr_sum += d_arr*d_arr
+                # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR)
+                else:
+                    u_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()]
+                    d_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()]
+                    u_arr_sum += u_arr*u_arr
+                    d_arr_sum += d_arr*d_arr
+
+            # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs
+            print("pTHIS!",u_arr_sum)
+            print("pTHIS!",d_arr_sum)
+            u_arr_sum = np.sqrt(u_arr_sum)
+            d_arr_sum = np.sqrt(d_arr_sum)
+            print("THIS!",u_arr_sum)
+            print("THIS!",d_arr_sum)
+
+
         u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal
         d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal
         p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield
@@ -346,6 +397,8 @@ def get_shape_syst_arrs(base_histo):
         p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list
         m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list
 
+    print("done")
+    exit()
     return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)]
 
 # Get the squared arr for the jet dependent syst (e.g. for diboson jet dependent syst)
@@ -1011,10 +1064,10 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_
 
     # Loop over hists and make plots
     skip_lst = [] # Skip these hists
-    #skip_wlst = ["njets"] # Skip all but these hists
+    skip_wlst = ["njets","lj0pt"] # Skip all but these hists
     for idx,var_name in enumerate(dict_of_hists.keys()):
         if (var_name in skip_lst): continue
-        #if (var_name not in skip_wlst): continue
+        if (var_name not in skip_wlst): continue
         if (var_name == "njets"):
             # We do not keep track of jets in the sparse axis for the njets hists
             cr_cat_dict = get_dict_with_stripped_bin_names(CR_CHAN_DICT,"njets")
@@ -1037,6 +1090,7 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_
 
         # Loop over the CR categories
         for hist_cat in cr_cat_dict.keys():
+            if hist_cat != "cr_3l": continue
             if (hist_cat == "cr_2los_Z" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The 2los Z category does not require jets (so leading jet plots do not make sense)
             if (hist_cat == "cr_2lss_flip" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The flip category does not require jets (so leading jet plots do not make sense)
             print("\n\tCategory:",hist_cat)

From dcf5dfe50b34d85650efcf7839225f89f77c05c7 Mon Sep 17 00:00:00 2001
From: Kelci Mohrman <kmohrman@nd.edu>
Date: Sun, 18 Dec 2022 14:43:06 -0500
Subject: [PATCH 3/5] Relative error decorrelated across groups, except signal

---
 analysis/topEFT/make_cr_and_sr_plots.py | 109 +++++++++++++++---------
 1 file changed, 71 insertions(+), 38 deletions(-)

diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py
index 252721dc8..5b740de4c 100644
--- a/analysis/topEFT/make_cr_and_sr_plots.py
+++ b/analysis/topEFT/make_cr_and_sr_plots.py
@@ -340,64 +340,97 @@ def get_shape_syst_arrs(base_histo):
     m_arr_rel_lst = []
     for syst_name in syst_var_lst:
 
-        if syst_name != "renormfact":continue
-
         relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst
+        n_arr     = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation
+
+        #if syst_name != "renormfact":continue
         print("\t",syst_name)
         print("\t",relevant_samples_lst)
-        n_arr     = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()]        # Sum of all samples for nominal variation
-        u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()]   # Sum of all samples for up variation
-        d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation
 
-        print("n_arr",n_arr)
-        print("sum u",u_arr_sum)
-        print("sum d",d_arr_sum)
-
-        # Uncorrelate renorm and fact across the processes (though leave processes in groups like dibosons correlated to be consistent with SR)
+        # Special handling of renorm and fact
+        # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR)
         if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done
-            u_arr_sum = np.zeros_like(n_arr) # Initialize to 0
-            d_arr_sum = np.zeros_like(n_arr) # Initialize to 0
+            a_arr_sum = np.zeros_like(n_arr) # Initialize to 0
+
+            for k,v in CR_GRP_MAP.items():
+                print("\n",k)
+                for s in v:
+                    if s in relevant_samples_lst: print(s)
 
-            #for proc_grp in CR_GRP_MAP.keys():
-            for proc_grp in ["test"]:
-                #proc_lst = CR_GRP_MAP[proc_grp]
-                #if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here
-                #if proc_lst == []: continue
+            # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated
+            #for proc_grp in ["test"]:
+            for proc_grp in CR_GRP_MAP.keys():
+                proc_lst = CR_GRP_MAP[proc_grp]
+                if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here
+                if proc_lst == []: continue
 
-                proc_lst = relevant_samples_lst
+                #proc_lst = relevant_samples_lst
 
                 # We'll keep all signal processes as uncorrelated, as in SR
                 if proc_grp == "Signal":
                     for proc_name in proc_lst:
                         if proc_name not in relevant_samples_lst: continue
-                        u_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()]
-                        d_arr = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()]
-                        u_arr_sum += u_arr*u_arr
-                        d_arr_sum += d_arr*d_arr
+
+                        n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()]
+                        u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()]
+                        d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()]
+
+                        u_arr_proc_rel = u_arr_proc - n_arr_proc
+                        d_arr_proc_rel = d_arr_proc - n_arr_proc
+                        a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0
+
+                        #print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel*a_arr_proc_rel)
+                        print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel)
+                        a_arr_sum += a_arr_proc_rel*a_arr_proc_rel
+
                 # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR)
                 else:
-                    u_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()]
-                    d_arr = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()]
-                    u_arr_sum += u_arr*u_arr
-                    d_arr_sum += d_arr*d_arr
+                    n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()]
+                    u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()]
+                    d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()]
+                    u_arr_grp_rel = u_arr_grp - n_arr_grp
+                    d_arr_grp_rel = d_arr_grp - n_arr_grp
+                    a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0
+
+                    a_arr_sum += a_arr_grp_rel*a_arr_grp_rel
 
             # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs
-            print("pTHIS!",u_arr_sum)
-            print("pTHIS!",d_arr_sum)
-            u_arr_sum = np.sqrt(u_arr_sum)
-            d_arr_sum = np.sqrt(d_arr_sum)
-            print("THIS!",u_arr_sum)
-            print("THIS!",d_arr_sum)
-
-
-        u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal
-        d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal
-        p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield
-        m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield
+            p_arr_rel =  np.sqrt(a_arr_sum)
+            m_arr_rel = -np.sqrt(a_arr_sum)
+
+
+        # If the syst is not renorm or fact, just treat it normally (correlate across all processes)
+        else:
+            u_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Up").values()[()]   # Sum of all samples for up variation
+            d_arr_sum = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic",syst_name+"Down").values()[()] # Sum of all samples for down variation
+
+            u_arr_rel = u_arr_sum - n_arr # Diff with respect to nominal
+            d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal
+            p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield
+            m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield
+            print("n_arr",n_arr)
+            print("u_arr_sum",u_arr_sum)
+            print("d_arr_sum",d_arr_sum)
+            print("m rel",m_arr_rel)
+            print("p rel",p_arr_rel)
+
+        print("\nEND:")
+        print("p_arr_rel",p_arr_rel)
+        print("m_arr_rel",m_arr_rel)
+
+        # Square and append this syst to the return lists
         p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list
         m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list
 
     print("done")
+
+    print("\np_arr_rel_lst",p_arr_rel_lst)
+    print("\nm_arr_rel_lst",m_arr_rel_lst)
+
+    print("\n\nRETURNING")
+    print("\np",sum(p_arr_rel_lst))
+    print("\nm",sum(m_arr_rel_lst))
+
     exit()
     return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)]
 

From 9fd8cab719a5bab4dcdf874a03788c188f0e7232 Mon Sep 17 00:00:00 2001
From: Kelci Mohrman <kmohrman@nd.edu>
Date: Sun, 18 Dec 2022 15:08:18 -0500
Subject: [PATCH 4/5] Clean up and move new renorm fact stuff to a function

---
 analysis/topEFT/make_cr_and_sr_plots.py | 130 +++++++++++-------------
 1 file changed, 61 insertions(+), 69 deletions(-)

diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py
index 5b740de4c..51a5bb04a 100644
--- a/analysis/topEFT/make_cr_and_sr_plots.py
+++ b/analysis/topEFT/make_cr_and_sr_plots.py
@@ -321,8 +321,6 @@ def get_rate_syst_arrs(base_histo,proc_group_map):
 # Wrapper for getting plus and minus shape arrs
 def get_shape_syst_arrs(base_histo):
 
-    print("\n\nHere in get_shape_syst_arrs")
-
     # Get the list of systematic base names (i.e. without the up and down tags)
     # Assumes each syst has a "systnameUp" and a "systnameDown" category on the systematic axis
     syst_var_lst = []
@@ -333,71 +331,18 @@ def get_shape_syst_arrs(base_histo):
             if syst_name_base not in syst_var_lst:
                 syst_var_lst.append(syst_name_base)
 
-    print("syst_var_lst",syst_var_lst)
-
     # Sum each systematic's contribtuions for all samples together (e.g. the ISR for all samples is summed linearly)
     p_arr_rel_lst = []
     m_arr_rel_lst = []
     for syst_name in syst_var_lst:
 
         relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst
-        n_arr     = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation
-
-        #if syst_name != "renormfact":continue
-        print("\t",syst_name)
-        print("\t",relevant_samples_lst)
+        n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation
 
         # Special handling of renorm and fact
         # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR)
         if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done
-            a_arr_sum = np.zeros_like(n_arr) # Initialize to 0
-
-            for k,v in CR_GRP_MAP.items():
-                print("\n",k)
-                for s in v:
-                    if s in relevant_samples_lst: print(s)
-
-            # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated
-            #for proc_grp in ["test"]:
-            for proc_grp in CR_GRP_MAP.keys():
-                proc_lst = CR_GRP_MAP[proc_grp]
-                if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renormfact not relevant here
-                if proc_lst == []: continue
-
-                #proc_lst = relevant_samples_lst
-
-                # We'll keep all signal processes as uncorrelated, as in SR
-                if proc_grp == "Signal":
-                    for proc_name in proc_lst:
-                        if proc_name not in relevant_samples_lst: continue
-
-                        n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()]
-                        u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()]
-                        d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()]
-
-                        u_arr_proc_rel = u_arr_proc - n_arr_proc
-                        d_arr_proc_rel = d_arr_proc - n_arr_proc
-                        a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0
-
-                        #print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel*a_arr_proc_rel)
-                        print("\n\nAPPENDING!!!",proc_name,a_arr_proc_rel)
-                        a_arr_sum += a_arr_proc_rel*a_arr_proc_rel
-
-                # Otherwise corrleated across groups (e.g. ZZ and WZ, as in SR)
-                else:
-                    n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()]
-                    u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()]
-                    d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()]
-                    u_arr_grp_rel = u_arr_grp - n_arr_grp
-                    d_arr_grp_rel = d_arr_grp - n_arr_grp
-                    a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0
-
-                    a_arr_sum += a_arr_grp_rel*a_arr_grp_rel
-
-            # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs
-            p_arr_rel =  np.sqrt(a_arr_sum)
-            m_arr_rel = -np.sqrt(a_arr_sum)
-
+            p_arr_rel,m_arr_rel = get_decorrelated_uncty(syst_name,CR_GRP_MAP,relevant_samples_lst,base_histo,n_arr)
 
         # If the syst is not renorm or fact, just treat it normally (correlate across all processes)
         else:
@@ -408,22 +353,11 @@ def get_shape_syst_arrs(base_histo):
             d_arr_rel = d_arr_sum - n_arr # Diff with respect to nominal
             p_arr_rel = np.where(u_arr_rel>0,u_arr_rel,d_arr_rel) # Just the ones that increase the yield
             m_arr_rel = np.where(u_arr_rel<0,u_arr_rel,d_arr_rel) # Just the ones that decrease the yield
-            print("n_arr",n_arr)
-            print("u_arr_sum",u_arr_sum)
-            print("d_arr_sum",d_arr_sum)
-            print("m rel",m_arr_rel)
-            print("p rel",p_arr_rel)
-
-        print("\nEND:")
-        print("p_arr_rel",p_arr_rel)
-        print("m_arr_rel",m_arr_rel)
 
         # Square and append this syst to the return lists
         p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list
         m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list
 
-    print("done")
-
     print("\np_arr_rel_lst",p_arr_rel_lst)
     print("\nm_arr_rel_lst",m_arr_rel_lst)
 
@@ -431,9 +365,67 @@ def get_shape_syst_arrs(base_histo):
     print("\np",sum(p_arr_rel_lst))
     print("\nm",sum(m_arr_rel_lst))
 
-    exit()
     return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)]
 
+
+# Special case for renorm and fact, as these are decorrelated across processes
+# Sorry to anyone who tries to read this in the future, this function is very ad hoc and messy and hard to follow
+# Just used in get_shape_syst_arrs()
+# Here are a few notes: 
+#   - This is complicated, so I just symmetrized the errors
+#   - The processes are generally correlated across groups (e.g. WZ and ZZ) since this is what's done in the datacard maker for the SR
+#   - So the grouping generally follows what's in the CR group map, excetp in the case of signal
+#       - Since in the SR all signal processes are uncorrelated for these systs, we also uncorrelate here
+#       - Note there are caviats to this:
+#           * In the SR, TTZToLL_M1to10 and TTToSemiLeptonic and TTTo2L2Nu are all grouped into ttll
+#           * Here in the CR TTZToLL_M1to10 is part of signal group, but TTToSemiLeptonic and TTTo2L2Nu are in their own ttbar group
+#           * So there are two differences with respect to how these processes are grouped in the SR: 
+#               1) Here TTToSemiLeptonic and TTTo2L2Nu are correlated with each other, but not with ttll
+#               2) Here TTZToLL_M1to10 is grouped as part of signal (as in SR) but here _all_ signal processes are uncorrleated so here TTZToLL_M1to10 is uncorrelated with ttll while in SR they would be correlated
+def get_decorrelated_uncty(syst_name,grp_map,relevant_samples_lst,base_histo,template_zeros_arr):
+
+    # Initialize the array we will return (ok technically we return sqrt of this arr squared..)
+    a_arr_sum = np.zeros_like(template_zeros_arr) # Just using this template_zeros_arr for its size
+
+    # Loop over the groups of processes, generally the processes in the groups will be correlated and the different groups will be uncorrelated
+    for proc_grp in grp_map.keys():
+        proc_lst = grp_map[proc_grp]
+        if proc_grp in ["Nonprompt","Flips","Data"]: continue # Renorm and fact not relevant here
+        if proc_lst == []: continue # Nothing here
+
+        # We'll keep all signal processes as uncorrelated, similar to what's done in SR
+        if proc_grp == "Signal":
+            for proc_name in proc_lst:
+                if proc_name not in relevant_samples_lst: continue
+
+                n_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic","nominal").values()[()]
+                u_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Up").values()[()]
+                d_arr_proc = base_histo.integrate("sample",proc_name).integrate("systematic",syst_name+"Down").values()[()]
+
+                u_arr_proc_rel = u_arr_proc - n_arr_proc
+                d_arr_proc_rel = d_arr_proc - n_arr_proc
+                a_arr_proc_rel = (abs(u_arr_proc_rel) + abs(d_arr_proc_rel))/2.0
+
+                a_arr_sum += a_arr_proc_rel*a_arr_proc_rel
+
+        # Otherwise corrleated across groups (e.g. ZZ and WZ, as datacard maker does in SR)
+        else:
+            n_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic","nominal").values()[()]
+            u_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Up").values()[()]
+            d_arr_grp = base_histo.integrate("sample",proc_lst).integrate("systematic",syst_name+"Down").values()[()]
+            u_arr_grp_rel = u_arr_grp - n_arr_grp
+            d_arr_grp_rel = d_arr_grp - n_arr_grp
+            a_arr_grp_rel = (abs(u_arr_grp_rel) + abs(d_arr_grp_rel))/2.0
+
+            a_arr_sum += a_arr_grp_rel*a_arr_grp_rel
+
+    # Before we move on, need to sqrt the outcome since later we'll square before adding in quadrature with other systs
+    p_arr_rel =  np.sqrt(a_arr_sum)
+    m_arr_rel = -np.sqrt(a_arr_sum)
+
+    return [p_arr_rel,m_arr_rel]
+
+
 # Get the squared arr for the jet dependent syst (e.g. for diboson jet dependent syst)
 def get_diboson_njets_syst_arr(njets_histo_vals_arr,bin0_njets):
 

From f017a99be9bb23c53852267d6847a63be905cc39 Mon Sep 17 00:00:00 2001
From: Kelci Mohrman <kmohrman@nd.edu>
Date: Sun, 18 Dec 2022 15:41:13 -0500
Subject: [PATCH 5/5] Clean up

---
 analysis/topEFT/make_cr_and_sr_plots.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/analysis/topEFT/make_cr_and_sr_plots.py b/analysis/topEFT/make_cr_and_sr_plots.py
index 51a5bb04a..bca542ea1 100644
--- a/analysis/topEFT/make_cr_and_sr_plots.py
+++ b/analysis/topEFT/make_cr_and_sr_plots.py
@@ -335,13 +335,15 @@ def get_shape_syst_arrs(base_histo):
     p_arr_rel_lst = []
     m_arr_rel_lst = []
     for syst_name in syst_var_lst:
+        # Skip the variation of renorm and fact together, since we're treating as independent
+        if syst_name == "renormfact": continue
 
         relevant_samples_lst = yt.get_cat_lables(base_histo.integrate("systematic",syst_name+"Up"), "sample") # The samples relevant to this syst
         n_arr = base_histo.integrate("sample",relevant_samples_lst).integrate("systematic","nominal").values()[()] # Sum of all samples for nominal variation
 
         # Special handling of renorm and fact
         # Uncorrelate these systs across the processes (though leave processes in groups like dibosons correlated to be consistent with SR)
-        if syst_name == "renormfact": # TODO Change to renorm and fact once new pkl file is done
+        if (syst_name == "renorm") or (syst_name == "fact"):
             p_arr_rel,m_arr_rel = get_decorrelated_uncty(syst_name,CR_GRP_MAP,relevant_samples_lst,base_histo,n_arr)
 
         # If the syst is not renorm or fact, just treat it normally (correlate across all processes)
@@ -358,13 +360,6 @@ def get_shape_syst_arrs(base_histo):
         p_arr_rel_lst.append(p_arr_rel*p_arr_rel) # Square each element in the arr and append the arr to the out list
         m_arr_rel_lst.append(m_arr_rel*m_arr_rel) # Square each element in the arr and append the arr to the out list
 
-    print("\np_arr_rel_lst",p_arr_rel_lst)
-    print("\nm_arr_rel_lst",m_arr_rel_lst)
-
-    print("\n\nRETURNING")
-    print("\np",sum(p_arr_rel_lst))
-    print("\nm",sum(m_arr_rel_lst))
-
     return [sum(m_arr_rel_lst), sum(p_arr_rel_lst)]
 
 
@@ -374,9 +369,9 @@ def get_shape_syst_arrs(base_histo):
 # Here are a few notes: 
 #   - This is complicated, so I just symmetrized the errors
 #   - The processes are generally correlated across groups (e.g. WZ and ZZ) since this is what's done in the datacard maker for the SR
-#   - So the grouping generally follows what's in the CR group map, excetp in the case of signal
+#   - So the grouping generally follows what's in the CR group map, except in the case of signal
 #       - Since in the SR all signal processes are uncorrelated for these systs, we also uncorrelate here
-#       - Note there are caviats to this:
+#       - Note there are caveats to this:
 #           * In the SR, TTZToLL_M1to10 and TTToSemiLeptonic and TTTo2L2Nu are all grouped into ttll
 #           * Here in the CR TTZToLL_M1to10 is part of signal group, but TTToSemiLeptonic and TTTo2L2Nu are in their own ttbar group
 #           * So there are two differences with respect to how these processes are grouped in the SR: 
@@ -1089,10 +1084,10 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_
 
     # Loop over hists and make plots
     skip_lst = [] # Skip these hists
-    skip_wlst = ["njets","lj0pt"] # Skip all but these hists
+    #skip_wlst = ["njets"] # Skip all but these hists
     for idx,var_name in enumerate(dict_of_hists.keys()):
         if (var_name in skip_lst): continue
-        if (var_name not in skip_wlst): continue
+        #if (var_name not in skip_wlst): continue
         if (var_name == "njets"):
             # We do not keep track of jets in the sparse axis for the njets hists
             cr_cat_dict = get_dict_with_stripped_bin_names(CR_CHAN_DICT,"njets")
@@ -1115,7 +1110,6 @@ def make_all_cr_plots(dict_of_hists,year,skip_syst_errs,unit_norm_bool,save_dir_
 
         # Loop over the CR categories
         for hist_cat in cr_cat_dict.keys():
-            if hist_cat != "cr_3l": continue
             if (hist_cat == "cr_2los_Z" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The 2los Z category does not require jets (so leading jet plots do not make sense)
             if (hist_cat == "cr_2lss_flip" and (("j0" in var_name) and ("lj0pt" not in var_name))): continue # The flip category does not require jets (so leading jet plots do not make sense)
             print("\n\tCategory:",hist_cat)