Skip to content

Commit

Permalink
adjusted the selection
Browse files Browse the repository at this point in the history
  • Loading branch information
haddadanas committed Nov 26, 2024
1 parent d6c9e7f commit 6378b41
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 23 deletions.
31 changes: 18 additions & 13 deletions columnflow/tasks/histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""

from __future__ import annotations
from functools import reduce
from operator import and_

import luigi
import law
Expand Down Expand Up @@ -226,18 +228,29 @@ def run(self):
# enable weights and store it
histograms[var_key] = h.Weight()

# create event mask
masks = []
for variable_inst in variable_insts:
sel = variable_inst.selection
if sel != "1":
if not callable(sel):
raise ValueError(f"invalid selection '{sel}', for now only callables are supported")
masks.append(sel(events))
mask = reduce(and_, masks, np.ones(len(events), dtype=bool))
masked_events = events[mask]

# merge category ids
category_ids = ak.concatenate(
[Route(c).apply(events) for c in self.category_id_columns],
[Route(c).apply(masked_events) for c in self.category_id_columns],
axis=-1,
)

# broadcast arrays so that each event can be filled for all its categories
fill_data = {
"category": category_ids,
"process": events.process_id,
"shift": np.ones(len(events), dtype=np.int32) * self.global_shift_inst.id,
"weight": weight,
"process": masked_events.process_id,
"shift": np.ones(len(masked_events), dtype=np.int32) * self.global_shift_inst.id,
"weight": weight[mask],
}
for variable_inst in variable_insts:
# prepare the expression
Expand All @@ -248,15 +261,7 @@ def expr(events, *args, **kwargs):
if len(events) == 0 and not has_ak_column(events, route):
return empty_f32
return route.apply(events, null_value=variable_inst.null_value)
arr = expr(events)
# prepare the selection
sel = variable_inst.selection
if sel != "1":
if not callable(sel):
raise ValueError(f"invalid selection '{sel}', for now only callables are supported")
mask = sel(events)
arr = arr[mask]
print(f"selection {variable_inst.name} applied")
arr = expr(masked_events)
# apply it
fill_data[variable_inst.name] = arr

Expand Down
26 changes: 16 additions & 10 deletions docs/user_guide/plotting.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,39 +373,45 @@ This may look as follows:

```python
config.add_variable(
name="jet_pt",
expression="Jet.pt",
binning=(50, 0, 300.0),
selection=(lambda events: events.Jet.mass > 30.0), # Select only jets with a mass larger than 30 GeV
name="hh_mass",
expression="hh.mass",
binning=(20, 250, 750.0),
selection=(lambda events: events.hh.pt > 30.0), # Select only events with a pt larger than 30 GeV
null_value=EMPTY_FLOAT, # Set the value of the variable to EMPTY_FLOAT if the selection is not passed
unit="GeV",
x_title=r"all Jet $p_{T}$",
aux={"inputs": ["Jet.mass"]}, # Add the needed selection columns to the auxiliary of the variable instance
x_title=r"$m_{hh}$",
aux={"inputs": ["hh.pt"]}, # Add the needed selection columns to the auxiliary of the variable instance
)
```

It is important to provide the `null_value` parameter, when using the `selection` parameter, as the variable will be set to this value if the selection is not passed.
The `selection` parameter only supports functions / lambda expressions for now.
The function itself can be as complex as needed, but its signature needs to match `def my_selection(events: ak.Array) -> ak.Array[bool]` where the variable array is passed to the function and the returned value is a boolean array of the same length as the input array.
The returned array is supposed to be an one-dimensional mask applied on event level.

The used columns in the selection function are not automatically added to the required routes of the workflow.
For this reason, it is necessary to add the columns used in the selection function to variable instance auxiliary and to make sure that the columns are produced at the time of creating the histograms.

:::{dropdown} An other examble with a more complex selection:
:::{dropdown} An other examble with a more complex selection on event level:

```python
def jet_selection(events):
"""select events where the sum of pt of jets with eta < 2.1 is greater than 200 GeV"""
import awkward as ak
eta_mask = events.Jet.eta < 2.1
mask = (ak.sum(events.Jet.pt[eta_mask], axis=-1) > 200)
return mask

config.add_variable(
name="jet_pt",
expression="Jet.pt",
binning=(50, 0, 300.0),
selection=(lambda events: abs(events.Jet.eta) ** 2 + abs(events.Jet.phi) ** 2 < 0.4),
selection=jet_selection,
null_value=EMPTY_FLOAT,
unit="GeV",
x_title=r"all Jet $p_{T}$",
aux={"inputs": ["Jet.eta", "Jet.phi"]},
aux={"inputs": ["Jet.pt", "Jet.eta"]},
)

```

:::

0 comments on commit 6378b41

Please sign in to comment.