Merge pull request #16 from cmstas/scale-wgts

Scale wgts and systematics (muR/muF and ISR/FSR)
cmstas · Jan 25, 2024 · 8084e60 · 8084e60
2 parents bde7a11 + ff0b8d6
commit 8084e60
Show file tree

Hide file tree

Showing 140 changed files with 6,815 additions and 5,552 deletions.
diff --git a/analysis/wwz/make_sample_lst_json.py b/analysis/wwz/make_sample_lst_json.py
@@ -333,6 +333,16 @@
     },
 }
 
+# CI example
+# Note, if using this to remake the json for the CI, should replace the file in the "files" with just output_1.root (no path) since assumes will be downloaded locally
+ci_dict = {
+    "UL17_WWZJetsTo4L2Nu_forCI" : {
+        "path" : "/cmsuf/data/store/user/kmohrman/test/ci_for_wwz",
+        "histAxisName": "UL17_WWZJetsTo4L2Nu",
+        "xsecName": "WWZ4l",
+    },
+}
+
 
 ############################ Convenience function ############################
 
@@ -353,6 +363,7 @@ def make_jsons_for_dict_of_samples(samples_dict,prefix,year,out_dir,on_das=False
             xsec_name = xsec_name,
             hist_axis_name = hist_axis_name,
             on_das = on_das,
+            include_lhe_wgts_arr = True,
         )
         out_name = sample_name+".json"
         if not os.path.exists(out_name):
@@ -375,6 +386,7 @@ def main():
 
     # A simple example
     #make_jsons_for_dict_of_samples(test_wwz_dict, "/ceph/cms/","2017",".",on_das=False) # An example
+    make_jsons_for_dict_of_samples(ci_dict, "","2017","../../input_samples/sample_jsons/test_samples/",on_das=False) # For CI json
 
     # Specify output paths
     jsons_path = "../../input_samples/sample_jsons/"

diff --git a/analysis/wwz/run_wwz4l.py b/analysis/wwz/run_wwz4l.py
@@ -15,19 +15,6 @@
 
 LST_OF_KNOWN_EXECUTORS = ["futures","work_queue","iterative"]
 
-WGT_VAR_LST = [
-    "nSumOfWeights_ISRUp",
-    "nSumOfWeights_ISRDown",
-    "nSumOfWeights_FSRUp",
-    "nSumOfWeights_FSRDown",
-    "nSumOfWeights_renormUp",
-    "nSumOfWeights_renormDown",
-    "nSumOfWeights_factUp",
-    "nSumOfWeights_factDown",
-    "nSumOfWeights_renormfactUp",
-    "nSumOfWeights_renormfactDown",
-]
-
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser(description='You can customize your run')
@@ -100,9 +87,9 @@
             port.append(port[0])
 
     # Figure out which hists to include
-    if args.hist_list == ["ana"]:
-        # Here we hardcode a list of hists used for the analysis
-        hist_lst = ["njets","lj0pt","ptz"]
+    if args.hist_list == ["few"]:
+        # Here we hardcode a reduced list of a few hists
+        hist_lst = ["j0pt", "njets", "nbtagsl", "nleps", "met", "l0pt"]
     elif args.hist_list == ["cr"]:
         # Here we hardcode a list of hists used for the CRs
         hist_lst = ["lj0pt", "ptz", "met", "ljptsum", "l0pt", "l0eta", "l1pt", "l1eta", "j0pt", "j0eta", "njets", "nbtagsl", "invmass"]
@@ -177,13 +164,10 @@ def LoadJsonToSampleName(jsonFile, prefix):
         samplesdict[sname]['nGenEvents'] = int(samplesdict[sname]['nGenEvents'])
         samplesdict[sname]['nSumOfWeights'] = float(samplesdict[sname]['nSumOfWeights'])
         if not samplesdict[sname]["isData"]:
-            for wgt_var in WGT_VAR_LST:
-                # Check that MC samples have all needed weight sums (only needed if doing systs)
-                if do_systs:
-                    if (wgt_var not in samplesdict[sname]):
-                        raise Exception(f"Missing weight variation \"{wgt_var}\".")
-                    else:
-                        samplesdict[sname][wgt_var] = float(samplesdict[sname][wgt_var])
+            # Check that MC samples have all needed weight sums (only needed if doing systs)
+            if do_systs:
+                if ("nSumOfLheWeights" not in samplesdict[sname]):
+                    raise Exception(f"Sample is missing scale variations: {sname}")
         # Print file info
         print('>> '+sname)
         print('   - isData?      : %s'   %('YES' if samplesdict[sname]['isData'] else 'NO'))
@@ -196,9 +180,8 @@ def LoadJsonToSampleName(jsonFile, prefix):
         print('   - nGenEvents   : %i'   %samplesdict[sname]['nGenEvents'])
         print('   - SumWeights   : %i'   %samplesdict[sname]['nSumOfWeights'])
         if not samplesdict[sname]["isData"]:
-            for wgt_var in WGT_VAR_LST:
-                if wgt_var in samplesdict[sname]:
-                    print(f'   - {wgt_var}: {samplesdict[sname][wgt_var]}')
+            if "nSumOfLheWeights" in samplesdict[sname]:
+                print(f'   - nSumOfLheWeights : {samplesdict[sname]["nSumOfLheWeights"]}')
         print('   - Prefix       : %s'   %samplesdict[sname]['redirector'])
         print('   - nFiles       : %i'   %len(samplesdict[sname]['files']))
         for fname in samplesdict[sname]['files']: print('     %s'%fname)

diff --git a/analysis/wwz/wwz4l.py b/analysis/wwz/wwz4l.py
@@ -145,8 +145,6 @@ def columns(self):
     # Main function: run on a given dataset
     def process(self, events):
 
-        TMPdosys = 0 # Temporary standin flag for now (eventualy just use self._do_systematics)
-
         # Dataset parameters
         dataset = events.metadata["dataset"]
 
@@ -158,30 +156,32 @@ def process(self, events):
 
         # Get up down weights from input dict
         if (self._do_systematics and not isData):
-            # Otherwise we have an NLO xsec, so for these systs we will have e.g. xsec_NLO*(N_pass_up/N_gen_up)
+            lhe_sow = self._samples[dataset]["nSumOfLheWeights"]
+            # This assumes we have an NLO xsec, so for these systs we will have e.g. xsec_NLO*(N_pass_up/N_gen_up)
             # Thus these systs should only affect acceptance and effeciency and shape
             # The uncty on xsec comes from NLO and is applied as a rate uncty in the text datacard
-            sow_ISRUp          = self._samples[dataset]["nSumOfWeights_ISRUp"          ]
-            sow_ISRDown        = self._samples[dataset]["nSumOfWeights_ISRDown"        ]
-            sow_FSRUp          = self._samples[dataset]["nSumOfWeights_FSRUp"          ]
-            sow_FSRDown        = self._samples[dataset]["nSumOfWeights_FSRDown"        ]
-            sow_renormUp       = self._samples[dataset]["nSumOfWeights_renormUp"       ]
-            sow_renormDown     = self._samples[dataset]["nSumOfWeights_renormDown"     ]
-            sow_factUp         = self._samples[dataset]["nSumOfWeights_factUp"         ]
-            sow_factDown       = self._samples[dataset]["nSumOfWeights_factDown"       ]
-            sow_renormfactUp   = self._samples[dataset]["nSumOfWeights_renormfactUp"   ]
-            sow_renormfactDown = self._samples[dataset]["nSumOfWeights_renormfactDown" ]
+            if lhe_sow == []:
+                sow_renormDown     = sow
+                sow_factDown       = sow
+                sow_factUp         = sow
+                sow_renormUp       = sow
+            elif len(lhe_sow) == 9:
+                sow_renormDown     = lhe_sow[1]
+                sow_factDown       = lhe_sow[3]
+                sow_factUp         = lhe_sow[5]
+                sow_renormUp       = lhe_sow[7]
+            elif len(lhe_sow) == 8:
+                sow_renormDown     = lhe_sow[1]
+                sow_factDown       = lhe_sow[3]
+                sow_factUp         = lhe_sow[4]
+                sow_renormUp       = lhe_sow[6]
+            else: raise Exception("ERROR: Unknown LHE weights length {len(lhe_sow)}")
         else:
-            sow_ISRUp          = -1
-            sow_ISRDown        = -1
-            sow_FSRUp          = -1
-            sow_FSRDown        = -1
             sow_renormUp       = -1
             sow_renormDown     = -1
             sow_factUp         = -1
             sow_factDown       = -1
-            sow_renormfactUp   = -1
-            sow_renormfactDown = -1
+
 
         datasets = ["SingleMuon", "SingleElectron", "EGamma", "MuonEG", "DoubleMuon", "DoubleElectron", "DoubleEG"]
         for d in datasets:
@@ -286,28 +286,41 @@ def process(self, events):
             weights_obj_base.add("norm",(xsec/sow)*genw*lumi*sm_wgt)
 
 
-        # Set up the list of systematics that are handled via event weight variations
-        wgt_correction_syst_lst = [
-            "btagSFlight_correlated", "btagSFbc_correlated", f"btagSFlight_uncorrelated_{year}", f"btagSFbc_uncorrelated_{year}",
-            "lepSF_elec", "lepSF_muon", "PreFiring", "PU"
-        ]
-        wgt_correction_syst_lst = append_up_down_to_sys_base(wgt_correction_syst_lst)
-
-        if not isData:
+            # Scale weights
+            cor_tc.AttachPSWeights(events)
+            cor_tc.AttachScaleWeights(events)
+            # FSR/ISR weights
+            # For now only consider variations in the numerator
+            weights_obj_base.add('ISR', events.nom, events.ISRUp, events.ISRDown)
+            weights_obj_base.add('FSR', events.nom, events.FSRUp, events.FSRDown)
+            # Renorm/fact scale
+            weights_obj_base.add('renorm', events.nom, events.renormUp*(sow/sow_renormUp), events.renormDown*(sow/sow_renormDown))
+            weights_obj_base.add('fact', events.nom, events.factUp*(sow/sow_factUp), events.factDown*(sow/sow_factDown))
 
+            # Misc other experimental SFs and systs
             weights_obj_base.add('PreFiring', events.L1PreFiringWeight.Nom,  events.L1PreFiringWeight.Up,  events.L1PreFiringWeight.Dn)
             weights_obj_base.add('PU', cor_tc.GetPUSF((events.Pileup.nTrueInt), year), cor_tc.GetPUSF(events.Pileup.nTrueInt, year, 'up'), cor_tc.GetPUSF(events.Pileup.nTrueInt, year, 'down'))
 
+            # Lepton SFs and systs
             weights_obj_base.add("lepSF_muon", events.sf_4l_muon, copy.deepcopy(events.sf_4l_hi_muon), copy.deepcopy(events.sf_4l_lo_muon))
             weights_obj_base.add("lepSF_elec", events.sf_4l_elec, copy.deepcopy(events.sf_4l_hi_elec), copy.deepcopy(events.sf_4l_lo_elec))
 
 
+        # Set up the list of systematics that are handled via event weight variations
+        wgt_correction_syst_lst = [
+            "btagSFlight_correlated", "btagSFbc_correlated", f"btagSFlight_uncorrelated_{year}", f"btagSFbc_uncorrelated_{year}",
+            "lepSF_elec", "lepSF_muon", "PreFiring", "PU",
+            "renorm", "fact", "ISR", "FSR",
+        ]
+        wgt_correction_syst_lst = append_up_down_to_sys_base(wgt_correction_syst_lst)
+
+
         ######### The rest of the processor is inside this loop over systs that affect object kinematics  ###########
 
         obj_correction_systs = [] # Will have e.g. jes etc
 
         # If we're doing systematics and this isn't data, we will loop over the obj correction syst lst list
-        if TMPdosys and not isData: obj_corr_syst_var_list = ["nominal"] + obj_correction_systs
+        if self._do_systematics and not isData: obj_corr_syst_var_list = ["nominal"] + obj_correction_systs
         # Otherwise loop juse once, for nominal
         else: obj_corr_syst_var_list = ['nominal']
 
@@ -391,7 +404,7 @@ def process(self, events):
                 weights_obj_base_for_kinematic_syst.add("btagSF", wgt_btag_nom)
 
                 # Put the btagging up and down weight variations into the weights object
-                if TMPdosys:
+                if self._do_systematics:
                     for btag_sys in ["correlated", "uncorrelated"]:
                         year_tag = f"_{year}"
                         if btag_sys == "correlated": year_tag = ""
@@ -767,7 +780,7 @@ def process(self, events):
             # Set up the list of weight fluctuations to loop over
             # For now the syst do not depend on the category, so we can figure this out outside of the filling loop
             wgt_var_lst = ["nominal"]
-            if TMPdosys:
+            if self._do_systematics:
                 if not isData:
                     if (obj_corr_syst_var != "nominal"):
                         # In this case, we are dealing with systs that change the kinematics of the objs (e.g. JES)

diff --git a/input_samples/sample_jsons/test_samples/UL17_WWZJetsTo4L2Nu_forCI.json b/input_samples/sample_jsons/test_samples/UL17_WWZJetsTo4L2Nu_forCI.json
@@ -1,15 +1,26 @@
 {
-  "xsec": 0.002067,
-  "year": "2017",
-  "treeName": "Events",
-  "histAxisName": "UL17_WWZJetsTo4L2Nu",
-  "options": "",
-  "WCnames": [],
-  "files": [
-    "output_1.root"
-  ],
-  "nEvents": 54868,
-  "nGenEvents": 109760,
-  "nSumOfWeights": 227.15914198199997,
-  "isData": false
+    "xsec": 0.002067,
+    "year": "2017",
+    "treeName": "Events",
+    "histAxisName": "UL17_WWZJetsTo4L2Nu",
+    "options": "",
+    "WCnames": [],
+    "files": [
+        "output_1.root"
+    ],
+    "nEvents": 54868,
+    "nGenEvents": 109760,
+    "nSumOfWeights": 227.15914198199997,
+    "isData": false,
+    "path": "/cmsuf/data/store/user/kmohrman/test/ci_for_wwz",
+    "nSumOfLheWeights": [
+        240.5168487561945,
+        236.51634455287,
+        233.27592728014238,
+        230.91358444751938,
+        224.05776881139798,
+        222.95829859531798,
+        219.4090775656835,
+        216.425230687383
+    ]
 }