Skip to content

Commit

Permalink
Merge branch 'master' of github.com:uhh-cms/hh2bbtautau into run3_master
Browse files Browse the repository at this point in the history
  • Loading branch information
haddadanas committed Jun 11, 2024
2 parents 3aa7e28 + 8435c34 commit bd1fe5e
Show file tree
Hide file tree
Showing 18 changed files with 425 additions and 851 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ data
.setups
.mypy_cache
.vscode
.python-version
675 changes: 2 additions & 673 deletions LICENSE

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
[![Lint and test](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml/badge.svg)](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml)
[![License](https://img.shields.io/github/license/uhh-cms/hh2bbtautau.svg)](https://github.com/uhh-cms/hh2bbtautau/blob/master/LICENSE)


### Quickstart

A couple test tasks are listed below.
Expand Down Expand Up @@ -44,6 +43,12 @@ law run cf.CreateDatacards \
--workers 3
```

### Useful links

- [columnflow documentation](https://columnflow.readthedocs.io/en/latest/index.html)
- [Nano documentation](https://gitlab.cern.ch/cms-nanoAOD/nanoaod-doc)
- [Correctionlib files](https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration)
- [HLT info browser](https://cmshltinfo.app.cern.ch/path/HLT_MediumChargedIsoPFTau180HighPtRelaxedIso_Trk50_eta2p1_v)

### Development

Expand Down
7 changes: 3 additions & 4 deletions hbt/calibration/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,15 @@

# derive calibrators to add settings
jec_full = jec.derive("jec_full", cls_dict={"mc_only": True, "nominal_only": True})
tec_full = tec.derive("tec_full", cls_dict={"nominal_only": True})


@calibrator(
uses={
mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec_full, deterministic_seeds,
mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
IF_RUN_2(met_phi),
},
produces={
mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec_full, deterministic_seeds,
mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
IF_RUN_2(met_phi),
},
)
Expand All @@ -48,7 +47,7 @@ def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:

if self.dataset_inst.is_mc:
if self.global_shift_inst.is_nominal:
events = self[tec_full](events, **kwargs)
events = self[tec](events, **kwargs)
else:
events = self[tec_nominal](events, **kwargs)

Expand Down
151 changes: 89 additions & 62 deletions hbt/calibration/tau.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,15 @@
uses={
# nano columns
"nTau", "Tau.pt", "Tau.eta", "Tau.phi", "Tau.mass", "Tau.charge", "Tau.genPartFlav",
"Tau.decayMode", "MET.pt", "MET.phi",
"Tau.decayMode",
},
produces={
"Tau.pt", "Tau.mass", "MET.pt", "MET.phi",
} | {
f"{field}_tec_{match}_dm{dm}_{direction}"
for field, match, dm, direction in itertools.product(
["Tau.pt", "Tau.mass", "MET.pt", "MET.phi"],
["jet", "e"],
[0, 1, 10, 11],
["up", "down"],
)
"Tau.pt", "Tau.mass",
},
# whether to produce also uncertainties
with_uncertainties=True,
# toggle for propagation to MET
propagate_met=True,
# only run on mc
mc_only=True,
# function to determine the correction file
Expand Down Expand Up @@ -76,16 +72,20 @@ def tec(

# get the scale factors for the four supported decay modes
dm_mask = (dm == 0) | (dm == 1) | (dm == 10) | (dm == 11)
scales_nom = np.ones_like(dm_mask, dtype=np.float32)
scales_up = np.ones_like(dm_mask, dtype=np.float32)
scales_down = np.ones_like(dm_mask, dtype=np.float32)

# prepare arguments for the correction tool
args = (pt[dm_mask], eta[dm_mask], dm[dm_mask], match[dm_mask], self.config_inst.x.tau_tagger)
if self.config_inst.campaign.x.run == 3:
args += self.config_inst.x.tau_energy_calibration

scales_nom = np.ones_like(dm_mask, dtype=np.float32)
scales_nom[dm_mask] = self.tec_corrector(*args, "nom")
scales_up[dm_mask] = self.tec_corrector(*args, "up")
scales_down[dm_mask] = self.tec_corrector(*args, "down")

if self.with_uncertainties:
scales_up = np.ones_like(dm_mask, dtype=np.float32)
scales_up[dm_mask] = self.tec_corrector(*args, "up")
scales_down = np.ones_like(dm_mask, dtype=np.float32)
scales_down[dm_mask] = self.tec_corrector(*args, "down")

# custom adjustment 1: reset where the matching value is unhandled
# custom adjustment 2: reset electrons faking taus where the pt is too small
Expand All @@ -95,42 +95,45 @@ def tec(
# apply reset masks
mask = mask1 | mask2
scales_nom[mask] = 1.0
scales_up[mask] = 1.0
scales_down[mask] = 1.0
if self.with_uncertainties:
scales_up[mask] = 1.0
scales_down[mask] = 1.0

# create varied collections per decay mode
for (match_mask, match_name), _dm, (direction, scales) in itertools.product(
[(match == 5, "jet"), ((match == 1) | (match == 3), "e")],
[0, 1, 10, 11],
[("up", scales_up), ("down", scales_down)],
):
# copy pt and mass
pt_varied = ak_copy(events.Tau.pt)
mass_varied = ak_copy(events.Tau.mass)
pt_view = flat_np_view(pt_varied, axis=1)
mass_view = flat_np_view(mass_varied, axis=1)

# correct pt and mass for taus with that gen match and decay mode
mask = match_mask & (dm == _dm)
pt_view[mask] *= scales[mask]
mass_view[mask] *= scales[mask]

# propagate changes to MET
met_pt_varied, met_phi_varied = propagate_met(
events.Tau.pt,
events.Tau.phi,
pt_varied,
events.Tau.phi,
events.MET.pt,
events.MET.phi,
)

# save columns
postfix = f"tec_{match_name}_dm{_dm}_{direction}"
events = set_ak_column_f32(events, f"Tau.pt_{postfix}", pt_varied)
events = set_ak_column_f32(events, f"Tau.mass_{postfix}", mass_varied)
events = set_ak_column_f32(events, f"MET.pt_{postfix}", met_pt_varied)
events = set_ak_column_f32(events, f"MET.phi_{postfix}", met_phi_varied)
if self.with_uncertainties:
for (match_mask, match_name), _dm, (direction, scales) in itertools.product(
[(match == 5, "jet"), ((match == 1) | (match == 3), "e")],
[0, 1, 10, 11],
[("up", scales_up), ("down", scales_down)],
):
# copy pt and mass
pt_varied = ak_copy(events.Tau.pt)
mass_varied = ak_copy(events.Tau.mass)
pt_view = flat_np_view(pt_varied, axis=1)
mass_view = flat_np_view(mass_varied, axis=1)

# correct pt and mass for taus with that gen match and decay mode
mask = match_mask & (dm == _dm)
pt_view[mask] *= scales[mask]
mass_view[mask] *= scales[mask]

# save columns
postfix = f"tec_{match_name}_dm{_dm}_{direction}"
events = set_ak_column_f32(events, f"Tau.pt_{postfix}", pt_varied)
events = set_ak_column_f32(events, f"Tau.mass_{postfix}", mass_varied)

# propagate changes to MET
if self.propagate_met:
met_pt_varied, met_phi_varied = propagate_met(
events.Tau.pt,
events.Tau.phi,
pt_varied,
events.Tau.phi,
events.MET.pt,
events.MET.phi,
)
events = set_ak_column_f32(events, f"MET.pt_{postfix}", met_pt_varied)
events = set_ak_column_f32(events, f"MET.phi_{postfix}", met_phi_varied)

# apply the nominal correction
# note: changes are applied to the views and directly propagate to the original ak arrays
Expand All @@ -140,22 +143,46 @@ def tec(
mass *= scales_nom

# propagate changes to MET
met_pt, met_phi = propagate_met(
tau_sum_before.pt,
tau_sum_before.phi,
events.Tau.pt,
events.Tau.phi,
events.MET.pt,
events.MET.phi,
)

# save columns
events = set_ak_column_f32(events, "MET.pt", met_pt)
events = set_ak_column_f32(events, "MET.phi", met_phi)
if self.propagate_met:
met_pt, met_phi = propagate_met(
tau_sum_before.pt,
tau_sum_before.phi,
events.Tau.pt,
events.Tau.phi,
events.MET.pt,
events.MET.phi,
)
events = set_ak_column_f32(events, "MET.pt", met_pt)
events = set_ak_column_f32(events, "MET.phi", met_phi)

return events


@tec.init
def tec_init(self: Calibrator) -> None:
# add nominal met columns of propagating nominal tec
if self.propagate_met:
self.uses |= {"MET.pt", "MET.phi"}
self.produces |= {"MET.pt", "MET.phi"}

# add columns with unceratinties if requested
if self.with_uncertainties:
# also check if met propagation is enabled
src_fields = ["Tau.pt", "Tau.mass"]
if self.propagate_met:
src_fields += ["MET.pt", "MET.phi"]

self.produces |= {
f"{field}_tec_{match}_dm{dm}_{direction}"
for field, match, dm, direction in itertools.product(
src_fields,
["jet", "e"],
[0, 1, 10, 11],
["up", "down"],
)
}


@tec.requires
def tec_requires(self: Calibrator, reqs: dict) -> None:
if "external_files" in reqs:
Expand All @@ -181,4 +208,4 @@ def tec_setup(self: Calibrator, reqs: dict, inputs: dict, reader_targets: Insert
assert self.tec_corrector.version in [0, 1]


tec_nominal = tec.derive("tec_nominal", cls_dict={"uncertainty_sources": []})
tec_nominal = tec.derive("tec_nominal", cls_dict={"with_uncertainties": False})
18 changes: 18 additions & 0 deletions hbt/columnflow_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import os
import getpass

import law
from columnflow.util import memoize
Expand Down Expand Up @@ -38,6 +39,23 @@ def patch_bundle_repo_exclude_files():
logger.debug(f"patched exclude_files of {BundleRepo.task_family}")


@memoize
def patch_htcondor_workflow_naf_resources():
"""
Patches the HTCondorWorkflow task to declare user-specific resources when running on the NAF.
"""
from columnflow.tasks.framework.remote import HTCondorWorkflow

def htcondor_job_resources(self, job_num, branches):
# one "naf_<username>" resource per job, indendent of the number of branches in the job
return {f"naf_{getpass.getuser()}": 1}

HTCondorWorkflow.htcondor_job_resources = htcondor_job_resources

logger.debug(f"patched htcondor_job_resources of {HTCondorWorkflow.task_family}")


@memoize
def patch_all():
patch_bundle_repo_exclude_files()
patch_htcondor_workflow_naf_resources()
79 changes: 40 additions & 39 deletions hbt/config/analysis_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

# analysis-global versions
# (empty since we use the lookup from the law.cfg instead)
analysis_hbt.x.versions = {}

# files of bash sandboxes that might be required by remote tasks
Expand All @@ -42,45 +43,45 @@
# load configs
#

# 2017
from hbt.config.configs_run2ul import add_config as add_config_run2ul
from cmsdb.campaigns.run2_2017_nano_v9 import campaign_run2_2017_nano_v9
from cmsdb.campaigns.run2_2017_nano_uhh_v11 import campaign_run2_2017_nano_uhh_v11


# default v9 config
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_v9.copy(),
config_name=campaign_run2_2017_nano_v9.name,
config_id=2,
)

# v9 config with limited number of files for faster prototyping
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_v9.copy(),
config_name=f"{campaign_run2_2017_nano_v9.name}_limited",
config_id=12,
limit_dataset_files=2,
)

# default v11 uhh config
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_uhh_v11.copy(),
config_name=campaign_run2_2017_nano_uhh_v11.name,
config_id=31,
)

# v11 uhh config with limited number of files for faster prototyping
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_uhh_v11.copy(),
config_name=f"{campaign_run2_2017_nano_uhh_v11.name}_limited",
config_id=32,
limit_dataset_files=2,
)
# # 2017
# from hbt.config.configs_run2ul import add_config as add_config_run2ul
# from cmsdb.campaigns.run2_2017_nano_v9 import campaign_run2_2017_nano_v9
# from cmsdb.campaigns.run2_2017_nano_uhh_v11 import campaign_run2_2017_nano_uhh_v11


# # default v9 config
# add_config_run2ul(
# analysis_hbt,
# campaign_run2_2017_nano_v9.copy(),
# config_name=campaign_run2_2017_nano_v9.name,
# config_id=2,
# )

# # v9 config with limited number of files for faster prototyping
# add_config_run2ul(
# analysis_hbt,
# campaign_run2_2017_nano_v9.copy(),
# config_name=f"{campaign_run2_2017_nano_v9.name}_limited",
# config_id=12,
# limit_dataset_files=2,
# )

# # default v11 uhh config
# add_config_run2ul(
# analysis_hbt,
# campaign_run2_2017_nano_uhh_v11.copy(),
# config_name=campaign_run2_2017_nano_uhh_v11.name,
# config_id=31,
# )

# # v11 uhh config with limited number of files for faster prototyping
# add_config_run2ul(
# analysis_hbt,
# campaign_run2_2017_nano_uhh_v11.copy(),
# config_name=f"{campaign_run2_2017_nano_uhh_v11.name}_limited",
# config_id=32,
# limit_dataset_files=2,
# )


#
Expand Down
Loading

0 comments on commit bd1fe5e

Please sign in to comment.