Merge branch 'master' of github.com:uhh-cms/hh2bbtautau into run3_master

haddadanas · Jun 11, 2024 · bd1fe5e · bd1fe5e
2 parents 3aa7e28 + 8435c34
commit bd1fe5e
Show file tree

Hide file tree

Showing 18 changed files with 425 additions and 851 deletions.
diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,4 @@ data
 .setups
 .mypy_cache
 .vscode
+.python-version
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -3,7 +3,6 @@
 [![Lint and test](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml/badge.svg)](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml)
 [![License](https://img.shields.io/github/license/uhh-cms/hh2bbtautau.svg)](https://github.com/uhh-cms/hh2bbtautau/blob/master/LICENSE)
 
-
 ### Quickstart
 
 A couple test tasks are listed below.
@@ -44,6 +43,12 @@ law run cf.CreateDatacards \
     --workers 3
 ```
 
+### Useful links
+
+- [columnflow documentation](https://columnflow.readthedocs.io/en/latest/index.html)
+- [Nano documentation](https://gitlab.cern.ch/cms-nanoAOD/nanoaod-doc)
+- [Correctionlib files](https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration)
+- [HLT info browser](https://cmshltinfo.app.cern.ch/path/HLT_MediumChargedIsoPFTau180HighPtRelaxedIso_Trk50_eta2p1_v)
 
 ### Development
 

diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py
@@ -19,16 +19,15 @@
 
 # derive calibrators to add settings
 jec_full = jec.derive("jec_full", cls_dict={"mc_only": True, "nominal_only": True})
-tec_full = tec.derive("tec_full", cls_dict={"nominal_only": True})
 
 
 @calibrator(
     uses={
-        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec_full, deterministic_seeds,
+        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
         IF_RUN_2(met_phi),
     },
     produces={
-        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec_full, deterministic_seeds,
+        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
         IF_RUN_2(met_phi),
     },
 )
@@ -48,7 +47,7 @@ def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
 
     if self.dataset_inst.is_mc:
         if self.global_shift_inst.is_nominal:
-            events = self[tec_full](events, **kwargs)
+            events = self[tec](events, **kwargs)
         else:
             events = self[tec_nominal](events, **kwargs)
 

diff --git a/hbt/calibration/tau.py b/hbt/calibration/tau.py
@@ -24,19 +24,15 @@
     uses={
         # nano columns
         "nTau", "Tau.pt", "Tau.eta", "Tau.phi", "Tau.mass", "Tau.charge", "Tau.genPartFlav",
-        "Tau.decayMode", "MET.pt", "MET.phi",
+        "Tau.decayMode",
     },
     produces={
-        "Tau.pt", "Tau.mass", "MET.pt", "MET.phi",
-    } | {
-        f"{field}_tec_{match}_dm{dm}_{direction}"
-        for field, match, dm, direction in itertools.product(
-            ["Tau.pt", "Tau.mass", "MET.pt", "MET.phi"],
-            ["jet", "e"],
-            [0, 1, 10, 11],
-            ["up", "down"],
-        )
+        "Tau.pt", "Tau.mass",
     },
+    # whether to produce also uncertainties
+    with_uncertainties=True,
+    # toggle for propagation to MET
+    propagate_met=True,
     # only run on mc
     mc_only=True,
     # function to determine the correction file
@@ -76,16 +72,20 @@ def tec(
 
     # get the scale factors for the four supported decay modes
     dm_mask = (dm == 0) | (dm == 1) | (dm == 10) | (dm == 11)
-    scales_nom = np.ones_like(dm_mask, dtype=np.float32)
-    scales_up = np.ones_like(dm_mask, dtype=np.float32)
-    scales_down = np.ones_like(dm_mask, dtype=np.float32)
 
+    # prepare arguments for the correction tool
     args = (pt[dm_mask], eta[dm_mask], dm[dm_mask], match[dm_mask], self.config_inst.x.tau_tagger)
     if self.config_inst.campaign.x.run == 3:
         args += self.config_inst.x.tau_energy_calibration
+
+    scales_nom = np.ones_like(dm_mask, dtype=np.float32)
     scales_nom[dm_mask] = self.tec_corrector(*args, "nom")
-    scales_up[dm_mask] = self.tec_corrector(*args, "up")
-    scales_down[dm_mask] = self.tec_corrector(*args, "down")
+
+    if self.with_uncertainties:
+        scales_up = np.ones_like(dm_mask, dtype=np.float32)
+        scales_up[dm_mask] = self.tec_corrector(*args, "up")
+        scales_down = np.ones_like(dm_mask, dtype=np.float32)
+        scales_down[dm_mask] = self.tec_corrector(*args, "down")
 
     # custom adjustment 1: reset where the matching value is unhandled
     # custom adjustment 2: reset electrons faking taus where the pt is too small
@@ -95,42 +95,45 @@ def tec(
     # apply reset masks
     mask = mask1 | mask2
     scales_nom[mask] = 1.0
-    scales_up[mask] = 1.0
-    scales_down[mask] = 1.0
+    if self.with_uncertainties:
+        scales_up[mask] = 1.0
+        scales_down[mask] = 1.0
 
     # create varied collections per decay mode
-    for (match_mask, match_name), _dm, (direction, scales) in itertools.product(
-        [(match == 5, "jet"), ((match == 1) | (match == 3), "e")],
-        [0, 1, 10, 11],
-        [("up", scales_up), ("down", scales_down)],
-    ):
-        # copy pt and mass
-        pt_varied = ak_copy(events.Tau.pt)
-        mass_varied = ak_copy(events.Tau.mass)
-        pt_view = flat_np_view(pt_varied, axis=1)
-        mass_view = flat_np_view(mass_varied, axis=1)
-
-        # correct pt and mass for taus with that gen match and decay mode
-        mask = match_mask & (dm == _dm)
-        pt_view[mask] *= scales[mask]
-        mass_view[mask] *= scales[mask]
-
-        # propagate changes to MET
-        met_pt_varied, met_phi_varied = propagate_met(
-            events.Tau.pt,
-            events.Tau.phi,
-            pt_varied,
-            events.Tau.phi,
-            events.MET.pt,
-            events.MET.phi,
-        )
-
-        # save columns
-        postfix = f"tec_{match_name}_dm{_dm}_{direction}"
-        events = set_ak_column_f32(events, f"Tau.pt_{postfix}", pt_varied)
-        events = set_ak_column_f32(events, f"Tau.mass_{postfix}", mass_varied)
-        events = set_ak_column_f32(events, f"MET.pt_{postfix}", met_pt_varied)
-        events = set_ak_column_f32(events, f"MET.phi_{postfix}", met_phi_varied)
+    if self.with_uncertainties:
+        for (match_mask, match_name), _dm, (direction, scales) in itertools.product(
+            [(match == 5, "jet"), ((match == 1) | (match == 3), "e")],
+            [0, 1, 10, 11],
+            [("up", scales_up), ("down", scales_down)],
+        ):
+            # copy pt and mass
+            pt_varied = ak_copy(events.Tau.pt)
+            mass_varied = ak_copy(events.Tau.mass)
+            pt_view = flat_np_view(pt_varied, axis=1)
+            mass_view = flat_np_view(mass_varied, axis=1)
+
+            # correct pt and mass for taus with that gen match and decay mode
+            mask = match_mask & (dm == _dm)
+            pt_view[mask] *= scales[mask]
+            mass_view[mask] *= scales[mask]
+
+            # save columns
+            postfix = f"tec_{match_name}_dm{_dm}_{direction}"
+            events = set_ak_column_f32(events, f"Tau.pt_{postfix}", pt_varied)
+            events = set_ak_column_f32(events, f"Tau.mass_{postfix}", mass_varied)
+
+            # propagate changes to MET
+            if self.propagate_met:
+                met_pt_varied, met_phi_varied = propagate_met(
+                    events.Tau.pt,
+                    events.Tau.phi,
+                    pt_varied,
+                    events.Tau.phi,
+                    events.MET.pt,
+                    events.MET.phi,
+                )
+                events = set_ak_column_f32(events, f"MET.pt_{postfix}", met_pt_varied)
+                events = set_ak_column_f32(events, f"MET.phi_{postfix}", met_phi_varied)
 
     # apply the nominal correction
     # note: changes are applied to the views and directly propagate to the original ak arrays
@@ -140,22 +143,46 @@ def tec(
     mass *= scales_nom
 
     # propagate changes to MET
-    met_pt, met_phi = propagate_met(
-        tau_sum_before.pt,
-        tau_sum_before.phi,
-        events.Tau.pt,
-        events.Tau.phi,
-        events.MET.pt,
-        events.MET.phi,
-    )
-
-    # save columns
-    events = set_ak_column_f32(events, "MET.pt", met_pt)
-    events = set_ak_column_f32(events, "MET.phi", met_phi)
+    if self.propagate_met:
+        met_pt, met_phi = propagate_met(
+            tau_sum_before.pt,
+            tau_sum_before.phi,
+            events.Tau.pt,
+            events.Tau.phi,
+            events.MET.pt,
+            events.MET.phi,
+        )
+        events = set_ak_column_f32(events, "MET.pt", met_pt)
+        events = set_ak_column_f32(events, "MET.phi", met_phi)
 
     return events
 
 
+@tec.init
+def tec_init(self: Calibrator) -> None:
+    # add nominal met columns of propagating nominal tec
+    if self.propagate_met:
+        self.uses |= {"MET.pt", "MET.phi"}
+        self.produces |= {"MET.pt", "MET.phi"}
+
+    # add columns with unceratinties if requested
+    if self.with_uncertainties:
+        # also check if met propagation is enabled
+        src_fields = ["Tau.pt", "Tau.mass"]
+        if self.propagate_met:
+            src_fields += ["MET.pt", "MET.phi"]
+
+        self.produces |= {
+            f"{field}_tec_{match}_dm{dm}_{direction}"
+            for field, match, dm, direction in itertools.product(
+                src_fields,
+                ["jet", "e"],
+                [0, 1, 10, 11],
+                ["up", "down"],
+            )
+        }
+
+
 @tec.requires
 def tec_requires(self: Calibrator, reqs: dict) -> None:
     if "external_files" in reqs:
@@ -181,4 +208,4 @@ def tec_setup(self: Calibrator, reqs: dict, inputs: dict, reader_targets: Insert
     assert self.tec_corrector.version in [0, 1]
 
 
-tec_nominal = tec.derive("tec_nominal", cls_dict={"uncertainty_sources": []})
+tec_nominal = tec.derive("tec_nominal", cls_dict={"with_uncertainties": False})
diff --git a/hbt/columnflow_patches.py b/hbt/columnflow_patches.py
@@ -5,6 +5,7 @@
 """
 
 import os
+import getpass
 
 import law
 from columnflow.util import memoize
@@ -38,6 +39,23 @@ def patch_bundle_repo_exclude_files():
     logger.debug(f"patched exclude_files of {BundleRepo.task_family}")
 
 
+@memoize
+def patch_htcondor_workflow_naf_resources():
+    """
+    Patches the HTCondorWorkflow task to declare user-specific resources when running on the NAF.
+    """
+    from columnflow.tasks.framework.remote import HTCondorWorkflow
+
+    def htcondor_job_resources(self, job_num, branches):
+        # one "naf_<username>" resource per job, indendent of the number of branches in the job
+        return {f"naf_{getpass.getuser()}": 1}
+
+    HTCondorWorkflow.htcondor_job_resources = htcondor_job_resources
+
+    logger.debug(f"patched htcondor_job_resources of {HTCondorWorkflow.task_family}")
+
+
 @memoize
 def patch_all():
     patch_bundle_repo_exclude_files()
+    patch_htcondor_workflow_naf_resources()
diff --git a/hbt/config/analysis_hbt.py b/hbt/config/analysis_hbt.py
@@ -17,6 +17,7 @@
 )
 
 # analysis-global versions
+# (empty since we use the lookup from the law.cfg instead)
 analysis_hbt.x.versions = {}
 
 # files of bash sandboxes that might be required by remote tasks
@@ -42,45 +43,45 @@
 # load configs
 #
 
-# 2017
-from hbt.config.configs_run2ul import add_config as add_config_run2ul
-from cmsdb.campaigns.run2_2017_nano_v9 import campaign_run2_2017_nano_v9
-from cmsdb.campaigns.run2_2017_nano_uhh_v11 import campaign_run2_2017_nano_uhh_v11
-
-
-# default v9 config
-add_config_run2ul(
-    analysis_hbt,
-    campaign_run2_2017_nano_v9.copy(),
-    config_name=campaign_run2_2017_nano_v9.name,
-    config_id=2,
-)
-
-# v9 config with limited number of files for faster prototyping
-add_config_run2ul(
-    analysis_hbt,
-    campaign_run2_2017_nano_v9.copy(),
-    config_name=f"{campaign_run2_2017_nano_v9.name}_limited",
-    config_id=12,
-    limit_dataset_files=2,
-)
-
-# default v11 uhh config
-add_config_run2ul(
-    analysis_hbt,
-    campaign_run2_2017_nano_uhh_v11.copy(),
-    config_name=campaign_run2_2017_nano_uhh_v11.name,
-    config_id=31,
-)
-
-# v11 uhh config with limited number of files for faster prototyping
-add_config_run2ul(
-    analysis_hbt,
-    campaign_run2_2017_nano_uhh_v11.copy(),
-    config_name=f"{campaign_run2_2017_nano_uhh_v11.name}_limited",
-    config_id=32,
-    limit_dataset_files=2,
-)
+# # 2017
+# from hbt.config.configs_run2ul import add_config as add_config_run2ul
+# from cmsdb.campaigns.run2_2017_nano_v9 import campaign_run2_2017_nano_v9
+# from cmsdb.campaigns.run2_2017_nano_uhh_v11 import campaign_run2_2017_nano_uhh_v11
+
+
+# # default v9 config
+# add_config_run2ul(
+#     analysis_hbt,
+#     campaign_run2_2017_nano_v9.copy(),
+#     config_name=campaign_run2_2017_nano_v9.name,
+#     config_id=2,
+# )
+
+# # v9 config with limited number of files for faster prototyping
+# add_config_run2ul(
+#     analysis_hbt,
+#     campaign_run2_2017_nano_v9.copy(),
+#     config_name=f"{campaign_run2_2017_nano_v9.name}_limited",
+#     config_id=12,
+#     limit_dataset_files=2,
+# )
+
+# # default v11 uhh config
+# add_config_run2ul(
+#     analysis_hbt,
+#     campaign_run2_2017_nano_uhh_v11.copy(),
+#     config_name=campaign_run2_2017_nano_uhh_v11.name,
+#     config_id=31,
+# )
+
+# # v11 uhh config with limited number of files for faster prototyping
+# add_config_run2ul(
+#     analysis_hbt,
+#     campaign_run2_2017_nano_uhh_v11.copy(),
+#     config_name=f"{campaign_run2_2017_nano_uhh_v11.name}_limited",
+#     config_id=32,
+#     limit_dataset_files=2,
+# )
 
 
 #
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,3 +37,4 @@ data @@
     .setups
     .mypy_cache
     .vscode
+    .python-version