Skip to content

Commit

Permalink
More versatile generation surfaces (#25)
Browse files Browse the repository at this point in the history
* More versatile generation surfaces

Before this commit the generation surface was assumed to have 3
components: an event_weight a spatial pdf and an energy pdf. This mostly
worked but had some problems. There was weirdness caused by the fact
that genie doesn't store the zenith angle so it had to be faked.
This is also hindering future developement for more types of simulation.
This commit changes it so that generation surface is now an arbitrary
length sequence of pdfs, such that any amount of pdfs can be used with
any name columns.

* fix illegal instruction in tables on macos

* fix version of pytable
  • Loading branch information
kjmeagher authored Dec 5, 2023
1 parent 82ec3ae commit 1c3e313
Show file tree
Hide file tree
Showing 16 changed files with 260 additions and 132 deletions.
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = ["numpy>=1.21.2", "scipy"]
dynamic = ["version", "description"]

[project.optional-dependencies]
test = ["h5py", "tables < 3.8; python_version < '3.9'", "tables", "pandas", "uproot", "pytest-cov"]
test = ["h5py", "tables < 3.8; python_version < '3.9'", "tables <= 3.9.1", "pandas", "uproot", "pytest-cov"]
docs = ["sphinx","sphinx-rtd-theme","pandas"]
dev = ["pytest","pre-commit","reuse","black","ruff","pylint","mypy"]

Expand Down Expand Up @@ -74,7 +74,7 @@ enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
max-line-length = "128"

[tool.pylint.messages_control]
disable = "C0114"
disable = "C0114,R0902,R0913"

[tool.ruff]
select = ["ALL"]
Expand All @@ -83,8 +83,9 @@ ignore = [
"ANN401", # any-type
"FBT", # flake8-boolean-trap
"S101", # assert-used
"COM812", #confilcts with ruff formatter
"ISC001", #confilcts with ruff formatter
"COM812", # confilcts with ruff formatter
"ISC001", # confilcts with ruff formatter
"PLR0913",# Too many arguments in function definition
]
line-length = 128
target-version = "py38"
Expand Down
9 changes: 7 additions & 2 deletions src/simweights/_corsika_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import NaturalRateCylinder
from ._utils import constcol, get_column, get_table, has_table
from ._utils import Column, constcol, get_column, get_table, has_table
from ._weighter import Weighter


Expand All @@ -32,17 +32,20 @@ def sframe_corsika_surface(table: Any, oversampling: bool) -> GenerationSurface:
get_column(table, "cylinder_radius")[i],
np.cos(get_column(table, "max_zenith")[i]),
np.cos(get_column(table, "min_zenith")[i]),
"cos_zen",
)
spectrum = PowerLaw(
get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
oversampling_val = get_column(table, "oversampling")[i] if oversampling else 1
pdgid = int(get_column(table, "primary_type")[i])
surfaces.append(
get_column(table, "n_events")[i]
* oversampling_val
* generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
* generation_surface(pdgid, Column("event_weight"), spectrum, spatial),
)
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
Expand All @@ -61,6 +64,7 @@ def weight_map_corsika_surface(table: Any) -> GenerationSurface:
constcol(table, "CylinderRadius", mask),
np.cos(constcol(table, "ThetaMax", mask)),
np.cos(constcol(table, "ThetaMin", mask)),
"cos_zen",
)

primary_spectral_index = round(constcol(table, "PrimarySpectralIndex", mask), 6)
Expand All @@ -70,6 +74,7 @@ def weight_map_corsika_surface(table: Any) -> GenerationSurface:
primary_spectral_index,
constcol(table, "EnergyPrimaryMin", mask),
constcol(table, "EnergyPrimaryMax", mask),
"energy",
)
nevents = constcol(table, "OverSampling", mask) * constcol(table, "NEvents", mask)
surface += nevents * generation_surface(pdgid, spectrum, spatial)
Expand Down
80 changes: 48 additions & 32 deletions src/simweights/_generation_surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,28 @@
from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING, NamedTuple
from typing import TYPE_CHECKING, NamedTuple, Sequence

import numpy as np

from ._pdgcode import PDGCode
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector, CylinderBase, SpatialDist

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from numpy.typing import ArrayLike, NDArray

from ._powerlaw import PowerLaw # pragma: no cover
from ._spatial import SpatialDist # pragma: no cover
from ._utils import Column, Const

Dist = SpatialDist | PowerLaw | Column | Const


class SurfaceTuple(NamedTuple):
"""Container for the components Generation Surfaces."""

pdgid: int | PDGCode
nevents: float
energy_dist: PowerLaw
spatial_dist: SpatialDist
dists: Sequence


class GenerationSurface:
Expand All @@ -46,7 +48,7 @@ def _insert(self: GenerationSurface, surface: SurfaceTuple) -> None:
self.spectra[key] = []

for i, spec in enumerate(self.spectra[key]):
if surface.energy_dist == spec.energy_dist and surface.spatial_dist == spec.spatial_dist:
if surface.dists == spec.dists:
self.spectra[key][i] = spec._replace(nevents=surface.nevents + spec.nevents)
break
else:
Expand Down Expand Up @@ -77,30 +79,45 @@ def __mul__(self: GenerationSurface, factor: float) -> GenerationSurface:
def __rmul__(self: GenerationSurface, factor: float) -> GenerationSurface:
return self.__mul__(factor)

def get_keys(self: GenerationSurface) -> list[str]:
"""Get a list of the available keys needed for weighting this surface."""
keys = []
for x in self.spectra.values():
for y in x:
for a in y.dists:
keys += list(a.columns)
return list(set(keys))

def get_epdf(
self: GenerationSurface,
pdgid: ArrayLike,
energy: ArrayLike,
cos_zen: ArrayLike,
**kwargs: ArrayLike,
) -> NDArray[np.float64]:
"""Get the extended pdf of an event.
The pdf is the probability that an event with these parameters is generated. The pdf is multiplied
by the number of events.
"""
energy = np.asarray(energy)
cos_zen = np.asarray(cos_zen)
count = np.zeros_like(energy, dtype=float)

cols = {}
shape = None
for key, value in kwargs.items():
cols[key] = np.asfarray(value)
if shape is None:
shape = cols[key].shape
else:
assert shape == cols[key].shape # type: ignore[unreachable]
assert shape is not None
count = np.zeros(shape, dtype=np.float_)
# loop over particle type
for ptype in np.unique(pdgid):
mask = ptype == pdgid
if np.any(mask):
masked_energy = energy[mask]
masked_cos_zen = cos_zen[mask]
count[mask] += sum(
p.nevents * p.energy_dist.pdf(masked_energy) * p.spatial_dist.pdf(masked_cos_zen)
for p in self.spectra[ptype]
)
# loop over different datasets of the same particle type
for surface in self.spectra[ptype]:
result = surface.nevents
# loop over the different distributions in the generation surface
for dist in surface.dists:
result *= dist.pdf(*(cols[k][mask] for k in dist.columns))
count[mask] += result
return count

def get_pdgids(self: GenerationSurface) -> list[int | PDGCode]:
Expand All @@ -116,8 +133,10 @@ def get_energy_range(self: GenerationSurface, pdgid: PDGCode | None) -> tuple[fl
emax = -np.inf
for pid in pdgids:
for surf in self.spectra[pid]:
emin = min(emin, surf.energy_dist.a)
emax = max(emax, surf.energy_dist.b)
for dist in surf.dists:
if isinstance(dist, PowerLaw):
emin = min(emin, dist.a)
emax = max(emax, dist.b)
assert np.isfinite(emin)
assert np.isfinite(emax)
return emin, emax
Expand All @@ -131,8 +150,10 @@ def get_cos_zenith_range(self: GenerationSurface, pdgid: PDGCode | None) -> tupl
czmax = -np.inf
for pid in pdgids:
for surf in self.spectra[pid]:
czmin = min(czmin, surf.spatial_dist.cos_zen_min)
czmax = max(czmax, surf.spatial_dist.cos_zen_max)
for dist in surf.dists:
if isinstance(dist, (CircleInjector, CylinderBase)):
czmin = min(czmin, dist.cos_zen_min)
czmax = max(czmax, dist.cos_zen_max)
assert np.isfinite(czmin)
assert np.isfinite(czmax)
return czmin, czmax
Expand Down Expand Up @@ -164,18 +185,13 @@ def __str__(self: GenerationSurface) -> str:
ptype = PDGCode(pdgid).name
except ValueError:
ptype = str(pdgid)

collections = [f"N={subspec.nevents} {subspec.energy_dist} {subspec.spatial_dist}" for subspec in specs]
collections = (f"N={subspec.nevents} " + " ".join(repr(d) for d in subspec.dists) for subspec in specs)
outstrs.append(f" {ptype:>11} : " + "\n ".join(collections))
return "< " + self.__class__.__name__ + "\n" + "\n".join(outstrs) + "\n>"


def generation_surface(
pdgid: int | PDGCode,
energy_dist: PowerLaw,
spatial_dist: SpatialDist,
) -> GenerationSurface:
def generation_surface(pdgid: int | PDGCode, *dists: Dist) -> GenerationSurface:
"""Convenience function to generate a GenerationSurface for a single particle type."""
return GenerationSurface(
SurfaceTuple(pdgid=pdgid, nevents=1.0, energy_dist=energy_dist, spatial_dist=spatial_dist),
SurfaceTuple(pdgid=pdgid, nevents=1.0, dists=dists),
)
19 changes: 9 additions & 10 deletions src/simweights/_genie_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector
from ._utils import constcol, get_column, get_table
from ._utils import Column, Const, get_column, get_table
from ._weighter import Weighter


Expand All @@ -29,10 +29,14 @@ def genie_surface(table: Iterable[Mapping[str, float]]) -> GenerationSurface:
-get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
pdgid = int(get_column(table, "primary_type")[i])
nevents = get_column(table, "n_flux_events")[i]
global_probability_scale = get_column(table, "global_probability_scale")[i]
surfaces.append(
get_column(table, "n_flux_events")[i]
* generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
nevents
* generation_surface(pdgid, Const(1 / spatial.etendue / global_probability_scale), Column("wght"), spectrum),
)
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
Expand All @@ -47,15 +51,10 @@ def GenieWeighter(file_obj: Any) -> Weighter: # noqa: N802
"""
weight_table = get_table(file_obj, "I3GenieInfo")
surface = genie_surface(weight_table)
global_probability_scale = constcol(weight_table, "global_probability_scale")

weighter = Weighter([file_obj], surface)
weighter.add_weight_column("energy", weighter.get_column("I3GenieResult", "Ev"))
weighter.add_weight_column("pdgid", weighter.get_column("I3GenieResult", "neu").astype(np.int32))
weighter.add_weight_column("cos_zen", np.full(len(weighter.get_column("I3GenieResult", "Ev")), 1))
weighter.add_weight_column(
"event_weight",
global_probability_scale * weighter.get_column("I3GenieResult", "wght"),
)
weighter.add_weight_column("energy", weighter.get_column("I3GenieResult", "Ev"))
weighter.add_weight_column("wght", weighter.get_column("I3GenieResult", "wght"))

return weighter
12 changes: 6 additions & 6 deletions src/simweights/_icetop_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ def sframe_icetop_surface(table: Any) -> GenerationSurface:

for i in range(len(get_column(table, "n_events"))):
assert get_column(table, "power_law_index")[i] <= 0
spectrum = PowerLaw(
spectrum = PowerLaw( # pylint: disable=duplicate-code
get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
spatial = NaturalRateCylinder(
0, # set cylinder height to 0 to get simple surface plane
get_column(table, "sampling_radius")[i],
np.cos(get_column(table, "max_zenith")[i]),
np.cos(get_column(table, "min_zenith")[i]),
"cos_zen",
)
surfaces.append(
get_column(table, "n_events")[i] * generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
)
nevents = get_column(table, "n_events")[i]
pdgid = int(get_column(table, "primary_type")[i])
surfaces.append(nevents * generation_surface(pdgid, spectrum, spatial))
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
return retval
Expand All @@ -49,6 +51,4 @@ def IceTopWeighter(file_obj: Any) -> Weighter: # noqa: N802
weighter.add_weight_column("pdgid", pdgid)
weighter.add_weight_column("energy", weighter.get_column("MCPrimary", "energy"))
weighter.add_weight_column("cos_zen", np.cos(weighter.get_column("MCPrimary", "zenith")))
weighter.add_weight_column("event_weight", np.ones_like(pdgid))

return weighter
12 changes: 6 additions & 6 deletions src/simweights/_nugen_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector, SpatialDist, UniformSolidAngleCylinder
from ._utils import constcol, get_column, get_table, has_column
from ._utils import Column, constcol, get_column, get_table, has_column
from ._weighter import Weighter


Expand All @@ -30,14 +30,14 @@ def nugen_spatial(table: Any) -> SpatialDist:
injection_radius = constcol(table, "InjectionSurfaceR") if has_column(table, "InjectionSurfaceR") else -1

if injection_radius > 0:
return CircleInjector(injection_radius, min_cos, max_cos)
return CircleInjector(injection_radius, min_cos, max_cos, "cos_zen")

# Surface mode was added in V04-01-00 but the cylinder size was hard coded, `CylinderHeight` and
# `CylinderRadius` were added after later V06-00-00. If they are not in the table then use the
# hardcoded values
cylinder_height = constcol(table, "CylinderHeight") if has_column(table, "CylinderHeight") else 1900
cylinder_radius = constcol(table, "CylinderRadius") if has_column(table, "CylinderRadius") else 950
return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos)
return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos, "cos_zen")


def nugen_spectrum(table: Any) -> PowerLaw:
Expand All @@ -48,7 +48,7 @@ def nugen_spectrum(table: Any) -> PowerLaw:
# for negative slopes ie +2 means E**-2 injection spectrum
power_law_index = -constcol(table, "PowerLawIndex")
assert power_law_index <= 0
return PowerLaw(power_law_index, min_energy, max_energy)
return PowerLaw(power_law_index, min_energy, max_energy, "energy")


def nugen_surface(table: Any) -> GenerationSurface:
Expand All @@ -66,9 +66,9 @@ def nugen_surface(table: Any) -> GenerationSurface:
# newer version will explicitly put the ratio in `TypeWeight` but for older version we
# assume it is 0.5
type_weight = constcol(table, "TypeWeight", mask) if has_column(table, "TypeWeight") else 0.5
primary_type = constcol(table, "PrimaryNeutrinoType", mask)
primary_type = int(constcol(table, "PrimaryNeutrinoType", mask))
n_events = type_weight * constcol(table, "NEvents", mask)
surfaces.append(n_events * generation_surface(int(primary_type), spectrum, spatial))
surfaces.append(n_events * generation_surface(primary_type, Column("event_weight"), spectrum, spatial))
ret = sum(surfaces)
assert isinstance(ret, GenerationSurface)
return ret
Expand Down
4 changes: 2 additions & 2 deletions src/simweights/_powerlaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PowerLaw:

# pylint: disable=invalid-name

def __init__(self: PowerLaw, g: float, a: float, b: float) -> None:
def __init__(self: PowerLaw, g: float, a: float, b: float, colname: str | None = None) -> None:
assert b > a
self.g = float(g)
self.a = float(a)
Expand All @@ -48,8 +48,8 @@ def __init__(self: PowerLaw, g: float, a: float, b: float) -> None:
self.integral = np.log(self.b / self.a)
else:
self.integral = (self.b**self.G - self.a**self.G) / self.G

self.span = b - a
self.columns = (colname,)

def _pdf(self: PowerLaw, x: NDArray[np.float64]) -> NDArray[np.float64]:
return np.asfarray(x**self.g / self.integral)
Expand Down
Loading

0 comments on commit 1c3e313

Please sign in to comment.