diff --git a/columnflow/tasks/histograms.py b/columnflow/tasks/histograms.py index bf7431305..55d78d610 100644 --- a/columnflow/tasks/histograms.py +++ b/columnflow/tasks/histograms.py @@ -5,6 +5,8 @@ """ from __future__ import annotations +from functools import reduce +from operator import and_ import luigi import law @@ -226,18 +228,29 @@ def run(self): # enable weights and store it histograms[var_key] = h.Weight() + # create event mask + masks = [] + for variable_inst in variable_insts: + sel = variable_inst.selection + if sel != "1": + if not callable(sel): + raise ValueError(f"invalid selection '{sel}', for now only callables are supported") + masks.append(sel(events)) + mask = reduce(and_, masks, np.ones(len(events), dtype=bool)) + masked_events = events[mask] + # merge category ids category_ids = ak.concatenate( - [Route(c).apply(events) for c in self.category_id_columns], + [Route(c).apply(masked_events) for c in self.category_id_columns], axis=-1, ) # broadcast arrays so that each event can be filled for all its categories fill_data = { "category": category_ids, - "process": events.process_id, - "shift": np.ones(len(events), dtype=np.int32) * self.global_shift_inst.id, - "weight": weight, + "process": masked_events.process_id, + "shift": np.ones(len(masked_events), dtype=np.int32) * self.global_shift_inst.id, + "weight": weight[mask], } for variable_inst in variable_insts: # prepare the expression @@ -248,15 +261,7 @@ def expr(events, *args, **kwargs): if len(events) == 0 and not has_ak_column(events, route): return empty_f32 return route.apply(events, null_value=variable_inst.null_value) - arr = expr(events) - # prepare the selection - sel = variable_inst.selection - if sel != "1": - if not callable(sel): - raise ValueError(f"invalid selection '{sel}', for now only callables are supported") - mask = sel(events) - arr = arr[mask] - print(f"selection {variable_inst.name} applied") + arr = expr(masked_events) # apply it fill_data[variable_inst.name] = arr diff --git a/docs/user_guide/plotting.md b/docs/user_guide/plotting.md index e836674a2..d6cda37de 100644 --- a/docs/user_guide/plotting.md +++ b/docs/user_guide/plotting.md @@ -373,39 +373,45 @@ This may look as follows: ```python config.add_variable( - name="jet_pt", - expression="Jet.pt", - binning=(50, 0, 300.0), - selection=(lambda events: events.Jet.mass > 30.0), # Select only jets with a mass larger than 30 GeV + name="hh_mass", + expression="hh.mass", + binning=(20, 250, 750.0), + selection=(lambda events: events.hh.pt > 30.0), # Select only events with a pt larger than 30 GeV null_value=EMPTY_FLOAT, # Set the value of the variable to EMPTY_FLOAT if the selection is not passed unit="GeV", - x_title=r"all Jet $p_{T}$", - aux={"inputs": ["Jet.mass"]}, # Add the needed selection columns to the auxiliary of the variable instance + x_title=r"$m_{hh}$", + aux={"inputs": ["hh.pt"]}, # Add the needed selection columns to the auxiliary of the variable instance ) ``` It is important to provide the `null_value` parameter, when using the `selection` parameter, as the variable will be set to this value if the selection is not passed. The `selection` parameter only supports functions / lambda expressions for now. The function itself can be as complex as needed, but its signature needs to match `def my_selection(events: ak.Array) -> ak.Array[bool]` where the variable array is passed to the function and the returned value is a boolean array of the same length as the input array. +The returned array is supposed to be an one-dimensional mask applied on event level. The used columns in the selection function are not automatically added to the required routes of the workflow. For this reason, it is necessary to add the columns used in the selection function to variable instance auxiliary and to make sure that the columns are produced at the time of creating the histograms. -:::{dropdown} An other examble with a more complex selection: +:::{dropdown} An other examble with a more complex selection on event level: ```python +def jet_selection(events): + """select events where the sum of pt of jets with eta < 2.1 is greater than 200 GeV""" + import awkward as ak + eta_mask = events.Jet.eta < 2.1 + mask = (ak.sum(events.Jet.pt[eta_mask], axis=-1) > 200) + return mask config.add_variable( name="jet_pt", expression="Jet.pt", binning=(50, 0, 300.0), - selection=(lambda events: abs(events.Jet.eta) ** 2 + abs(events.Jet.phi) ** 2 < 0.4), + selection=jet_selection, null_value=EMPTY_FLOAT, unit="GeV", x_title=r"all Jet $p_{T}$", - aux={"inputs": ["Jet.eta", "Jet.phi"]}, + aux={"inputs": ["Jet.pt", "Jet.eta"]}, ) - ``` :::