Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More versatile generation surfaces #25

Merged
merged 3 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = ["numpy>=1.21.2", "scipy"]
dynamic = ["version", "description"]

[project.optional-dependencies]
test = ["h5py", "tables < 3.8; python_version < '3.9'", "tables", "pandas", "uproot", "pytest-cov"]
test = ["h5py", "tables < 3.8; python_version < '3.9'", "tables <= 3.9.1", "pandas", "uproot", "pytest-cov"]
docs = ["sphinx","sphinx-rtd-theme","pandas"]
dev = ["pytest","pre-commit","reuse","black","ruff","pylint","mypy"]

Expand Down Expand Up @@ -74,7 +74,7 @@ enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
max-line-length = "128"

[tool.pylint.messages_control]
disable = "C0114"
disable = "C0114,R0902,R0913"

[tool.ruff]
select = ["ALL"]
Expand All @@ -83,8 +83,9 @@ ignore = [
"ANN401", # any-type
"FBT", # flake8-boolean-trap
"S101", # assert-used
"COM812", #confilcts with ruff formatter
"ISC001", #confilcts with ruff formatter
"COM812", # confilcts with ruff formatter
"ISC001", # confilcts with ruff formatter
"PLR0913",# Too many arguments in function definition
]
line-length = 128
target-version = "py38"
Expand Down
9 changes: 7 additions & 2 deletions src/simweights/_corsika_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import NaturalRateCylinder
from ._utils import constcol, get_column, get_table, has_table
from ._utils import Column, constcol, get_column, get_table, has_table
from ._weighter import Weighter


Expand All @@ -32,17 +32,20 @@ def sframe_corsika_surface(table: Any, oversampling: bool) -> GenerationSurface:
get_column(table, "cylinder_radius")[i],
np.cos(get_column(table, "max_zenith")[i]),
np.cos(get_column(table, "min_zenith")[i]),
"cos_zen",
)
spectrum = PowerLaw(
get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
oversampling_val = get_column(table, "oversampling")[i] if oversampling else 1
pdgid = int(get_column(table, "primary_type")[i])
surfaces.append(
get_column(table, "n_events")[i]
* oversampling_val
* generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
* generation_surface(pdgid, Column("event_weight"), spectrum, spatial),
)
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
Expand All @@ -61,6 +64,7 @@ def weight_map_corsika_surface(table: Any) -> GenerationSurface:
constcol(table, "CylinderRadius", mask),
np.cos(constcol(table, "ThetaMax", mask)),
np.cos(constcol(table, "ThetaMin", mask)),
"cos_zen",
)

primary_spectral_index = round(constcol(table, "PrimarySpectralIndex", mask), 6)
Expand All @@ -70,6 +74,7 @@ def weight_map_corsika_surface(table: Any) -> GenerationSurface:
primary_spectral_index,
constcol(table, "EnergyPrimaryMin", mask),
constcol(table, "EnergyPrimaryMax", mask),
"energy",
)
nevents = constcol(table, "OverSampling", mask) * constcol(table, "NEvents", mask)
surface += nevents * generation_surface(pdgid, spectrum, spatial)
Expand Down
80 changes: 48 additions & 32 deletions src/simweights/_generation_surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,28 @@
from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING, NamedTuple
from typing import TYPE_CHECKING, NamedTuple, Sequence

import numpy as np

from ._pdgcode import PDGCode
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector, CylinderBase, SpatialDist

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from numpy.typing import ArrayLike, NDArray

from ._powerlaw import PowerLaw # pragma: no cover
from ._spatial import SpatialDist # pragma: no cover
from ._utils import Column, Const

Dist = SpatialDist | PowerLaw | Column | Const


class SurfaceTuple(NamedTuple):
"""Container for the components Generation Surfaces."""

pdgid: int | PDGCode
nevents: float
energy_dist: PowerLaw
spatial_dist: SpatialDist
dists: Sequence


class GenerationSurface:
Expand All @@ -46,7 +48,7 @@ def _insert(self: GenerationSurface, surface: SurfaceTuple) -> None:
self.spectra[key] = []

for i, spec in enumerate(self.spectra[key]):
if surface.energy_dist == spec.energy_dist and surface.spatial_dist == spec.spatial_dist:
if surface.dists == spec.dists:
self.spectra[key][i] = spec._replace(nevents=surface.nevents + spec.nevents)
break
else:
Expand Down Expand Up @@ -77,30 +79,45 @@ def __mul__(self: GenerationSurface, factor: float) -> GenerationSurface:
def __rmul__(self: GenerationSurface, factor: float) -> GenerationSurface:
return self.__mul__(factor)

def get_keys(self: GenerationSurface) -> list[str]:
"""Get a list of the available keys needed for weighting this surface."""
keys = []
for x in self.spectra.values():
for y in x:
for a in y.dists:
keys += list(a.columns)
return list(set(keys))

def get_epdf(
self: GenerationSurface,
pdgid: ArrayLike,
energy: ArrayLike,
cos_zen: ArrayLike,
**kwargs: ArrayLike,
) -> NDArray[np.float64]:
"""Get the extended pdf of an event.

The pdf is the probability that an event with these parameters is generated. The pdf is multiplied
by the number of events.
"""
energy = np.asarray(energy)
cos_zen = np.asarray(cos_zen)
count = np.zeros_like(energy, dtype=float)

cols = {}
shape = None
for key, value in kwargs.items():
cols[key] = np.asfarray(value)
if shape is None:
shape = cols[key].shape
else:
assert shape == cols[key].shape # type: ignore[unreachable]
assert shape is not None
count = np.zeros(shape, dtype=np.float_)
# loop over particle type
for ptype in np.unique(pdgid):
mask = ptype == pdgid
if np.any(mask):
masked_energy = energy[mask]
masked_cos_zen = cos_zen[mask]
count[mask] += sum(
p.nevents * p.energy_dist.pdf(masked_energy) * p.spatial_dist.pdf(masked_cos_zen)
for p in self.spectra[ptype]
)
# loop over different datasets of the same particle type
for surface in self.spectra[ptype]:
result = surface.nevents
# loop over the different distributions in the generation surface
for dist in surface.dists:
result *= dist.pdf(*(cols[k][mask] for k in dist.columns))
count[mask] += result
return count

def get_pdgids(self: GenerationSurface) -> list[int | PDGCode]:
Expand All @@ -116,8 +133,10 @@ def get_energy_range(self: GenerationSurface, pdgid: PDGCode | None) -> tuple[fl
emax = -np.inf
for pid in pdgids:
for surf in self.spectra[pid]:
emin = min(emin, surf.energy_dist.a)
emax = max(emax, surf.energy_dist.b)
for dist in surf.dists:
if isinstance(dist, PowerLaw):
emin = min(emin, dist.a)
emax = max(emax, dist.b)
assert np.isfinite(emin)
assert np.isfinite(emax)
return emin, emax
Expand All @@ -131,8 +150,10 @@ def get_cos_zenith_range(self: GenerationSurface, pdgid: PDGCode | None) -> tupl
czmax = -np.inf
for pid in pdgids:
for surf in self.spectra[pid]:
czmin = min(czmin, surf.spatial_dist.cos_zen_min)
czmax = max(czmax, surf.spatial_dist.cos_zen_max)
for dist in surf.dists:
if isinstance(dist, (CircleInjector, CylinderBase)):
czmin = min(czmin, dist.cos_zen_min)
czmax = max(czmax, dist.cos_zen_max)
assert np.isfinite(czmin)
assert np.isfinite(czmax)
return czmin, czmax
Expand Down Expand Up @@ -164,18 +185,13 @@ def __str__(self: GenerationSurface) -> str:
ptype = PDGCode(pdgid).name
except ValueError:
ptype = str(pdgid)

collections = [f"N={subspec.nevents} {subspec.energy_dist} {subspec.spatial_dist}" for subspec in specs]
collections = (f"N={subspec.nevents} " + " ".join(repr(d) for d in subspec.dists) for subspec in specs)
outstrs.append(f" {ptype:>11} : " + "\n ".join(collections))
return "< " + self.__class__.__name__ + "\n" + "\n".join(outstrs) + "\n>"


def generation_surface(
pdgid: int | PDGCode,
energy_dist: PowerLaw,
spatial_dist: SpatialDist,
) -> GenerationSurface:
def generation_surface(pdgid: int | PDGCode, *dists: Dist) -> GenerationSurface:
"""Convenience function to generate a GenerationSurface for a single particle type."""
return GenerationSurface(
SurfaceTuple(pdgid=pdgid, nevents=1.0, energy_dist=energy_dist, spatial_dist=spatial_dist),
SurfaceTuple(pdgid=pdgid, nevents=1.0, dists=dists),
)
19 changes: 9 additions & 10 deletions src/simweights/_genie_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector
from ._utils import constcol, get_column, get_table
from ._utils import Column, Const, get_column, get_table
from ._weighter import Weighter


Expand All @@ -29,10 +29,14 @@ def genie_surface(table: Iterable[Mapping[str, float]]) -> GenerationSurface:
-get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
pdgid = int(get_column(table, "primary_type")[i])
nevents = get_column(table, "n_flux_events")[i]
global_probability_scale = get_column(table, "global_probability_scale")[i]
surfaces.append(
get_column(table, "n_flux_events")[i]
* generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
nevents
* generation_surface(pdgid, Const(1 / spatial.etendue / global_probability_scale), Column("wght"), spectrum),
)
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
Expand All @@ -47,15 +51,10 @@ def GenieWeighter(file_obj: Any) -> Weighter: # noqa: N802
"""
weight_table = get_table(file_obj, "I3GenieInfo")
surface = genie_surface(weight_table)
global_probability_scale = constcol(weight_table, "global_probability_scale")

weighter = Weighter([file_obj], surface)
weighter.add_weight_column("energy", weighter.get_column("I3GenieResult", "Ev"))
weighter.add_weight_column("pdgid", weighter.get_column("I3GenieResult", "neu").astype(np.int32))
weighter.add_weight_column("cos_zen", np.full(len(weighter.get_column("I3GenieResult", "Ev")), 1))
weighter.add_weight_column(
"event_weight",
global_probability_scale * weighter.get_column("I3GenieResult", "wght"),
)
weighter.add_weight_column("energy", weighter.get_column("I3GenieResult", "Ev"))
weighter.add_weight_column("wght", weighter.get_column("I3GenieResult", "wght"))

return weighter
12 changes: 6 additions & 6 deletions src/simweights/_icetop_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ def sframe_icetop_surface(table: Any) -> GenerationSurface:

for i in range(len(get_column(table, "n_events"))):
assert get_column(table, "power_law_index")[i] <= 0
spectrum = PowerLaw(
spectrum = PowerLaw( # pylint: disable=duplicate-code
get_column(table, "power_law_index")[i],
get_column(table, "min_energy")[i],
get_column(table, "max_energy")[i],
"energy",
)
spatial = NaturalRateCylinder(
0, # set cylinder height to 0 to get simple surface plane
get_column(table, "sampling_radius")[i],
np.cos(get_column(table, "max_zenith")[i]),
np.cos(get_column(table, "min_zenith")[i]),
"cos_zen",
)
surfaces.append(
get_column(table, "n_events")[i] * generation_surface(int(get_column(table, "primary_type")[i]), spectrum, spatial),
)
nevents = get_column(table, "n_events")[i]
pdgid = int(get_column(table, "primary_type")[i])
surfaces.append(nevents * generation_surface(pdgid, spectrum, spatial))
retval = sum(surfaces)
assert isinstance(retval, GenerationSurface)
return retval
Expand All @@ -49,6 +51,4 @@ def IceTopWeighter(file_obj: Any) -> Weighter: # noqa: N802
weighter.add_weight_column("pdgid", pdgid)
weighter.add_weight_column("energy", weighter.get_column("MCPrimary", "energy"))
weighter.add_weight_column("cos_zen", np.cos(weighter.get_column("MCPrimary", "zenith")))
weighter.add_weight_column("event_weight", np.ones_like(pdgid))

return weighter
12 changes: 6 additions & 6 deletions src/simweights/_nugen_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._generation_surface import GenerationSurface, generation_surface
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector, SpatialDist, UniformSolidAngleCylinder
from ._utils import constcol, get_column, get_table, has_column
from ._utils import Column, constcol, get_column, get_table, has_column
from ._weighter import Weighter


Expand All @@ -30,14 +30,14 @@ def nugen_spatial(table: Any) -> SpatialDist:
injection_radius = constcol(table, "InjectionSurfaceR") if has_column(table, "InjectionSurfaceR") else -1

if injection_radius > 0:
return CircleInjector(injection_radius, min_cos, max_cos)
return CircleInjector(injection_radius, min_cos, max_cos, "cos_zen")

# Surface mode was added in V04-01-00 but the cylinder size was hard coded, `CylinderHeight` and
# `CylinderRadius` were added after later V06-00-00. If they are not in the table then use the
# hardcoded values
cylinder_height = constcol(table, "CylinderHeight") if has_column(table, "CylinderHeight") else 1900
cylinder_radius = constcol(table, "CylinderRadius") if has_column(table, "CylinderRadius") else 950
return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos)
return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos, "cos_zen")


def nugen_spectrum(table: Any) -> PowerLaw:
Expand All @@ -48,7 +48,7 @@ def nugen_spectrum(table: Any) -> PowerLaw:
# for negative slopes ie +2 means E**-2 injection spectrum
power_law_index = -constcol(table, "PowerLawIndex")
assert power_law_index <= 0
return PowerLaw(power_law_index, min_energy, max_energy)
return PowerLaw(power_law_index, min_energy, max_energy, "energy")


def nugen_surface(table: Any) -> GenerationSurface:
Expand All @@ -66,9 +66,9 @@ def nugen_surface(table: Any) -> GenerationSurface:
# newer version will explicitly put the ratio in `TypeWeight` but for older version we
# assume it is 0.5
type_weight = constcol(table, "TypeWeight", mask) if has_column(table, "TypeWeight") else 0.5
primary_type = constcol(table, "PrimaryNeutrinoType", mask)
primary_type = int(constcol(table, "PrimaryNeutrinoType", mask))
n_events = type_weight * constcol(table, "NEvents", mask)
surfaces.append(n_events * generation_surface(int(primary_type), spectrum, spatial))
surfaces.append(n_events * generation_surface(primary_type, Column("event_weight"), spectrum, spatial))
ret = sum(surfaces)
assert isinstance(ret, GenerationSurface)
return ret
Expand Down
4 changes: 2 additions & 2 deletions src/simweights/_powerlaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PowerLaw:

# pylint: disable=invalid-name

def __init__(self: PowerLaw, g: float, a: float, b: float) -> None:
def __init__(self: PowerLaw, g: float, a: float, b: float, colname: str | None = None) -> None:
assert b > a
self.g = float(g)
self.a = float(a)
Expand All @@ -48,8 +48,8 @@ def __init__(self: PowerLaw, g: float, a: float, b: float) -> None:
self.integral = np.log(self.b / self.a)
else:
self.integral = (self.b**self.G - self.a**self.G) / self.G

self.span = b - a
self.columns = (colname,)

def _pdf(self: PowerLaw, x: NDArray[np.float64]) -> NDArray[np.float64]:
return np.asfarray(x**self.g / self.integral)
Expand Down
Loading