From 0daab46e3f6fb577443687fb610f3eca0446e045 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Mon, 26 Feb 2024 16:09:58 -0500 Subject: [PATCH] add benchmarks/ecal_gaps --- .gitlab-ci.yml | 1 + Snakefile | 1 + benchmarks/ecal_gaps/Snakefile | 94 +++++++++++++ benchmarks/ecal_gaps/config.yml | 26 ++++ benchmarks/ecal_gaps/ecal_gaps.org | 192 ++++++++++++++++++++++++++ benchmarks/ecal_gaps/org2py.awk | 22 +++ benchmarks/ecal_gaps/requirements.txt | 7 + 7 files changed, 343 insertions(+) create mode 100644 benchmarks/ecal_gaps/Snakefile create mode 100644 benchmarks/ecal_gaps/config.yml create mode 100644 benchmarks/ecal_gaps/ecal_gaps.org create mode 100644 benchmarks/ecal_gaps/org2py.awk create mode 100644 benchmarks/ecal_gaps/requirements.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dae3f2f8..0e150857 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -129,6 +129,7 @@ get_data: include: # - local: 'benchmarks/backgrounds/config.yml' + - local: 'benchmarks/ecal_gaps/config.yml' - local: 'benchmarks/tracking_detectors/config.yml' - local: 'benchmarks/barrel_ecal/config.yml' - local: 'benchmarks/barrel_hcal/config.yml' diff --git a/Snakefile b/Snakefile index 6e4bc637..dcec820c 100644 --- a/Snakefile +++ b/Snakefile @@ -19,6 +19,7 @@ else: include: "benchmarks/backgrounds/Snakefile" include: "benchmarks/barrel_ecal/Snakefile" +include: "benchmarks/ecal_gaps/Snakefile" rule matplotlibrc: output: diff --git a/benchmarks/ecal_gaps/Snakefile b/benchmarks/ecal_gaps/Snakefile new file mode 100644 index 00000000..ed530469 --- /dev/null +++ b/benchmarks/ecal_gaps/Snakefile @@ -0,0 +1,94 @@ +import os + + +rule ecal_gaps_sim: + input: + steering_file=provider.remote(remote_path("EPIC/EVGEN/SINGLE/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.steer")), + output: + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", + log: + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log", + wildcard_constraints: + PARTICLE="e-", + ENERGY="(500MeV|5GeV|20GeV)", + PHASE_SPACE="(3to50|45to135|130to177)deg", + INDEX="\d{4}", + params: + N_EVENTS=1000 + shell: + """ +ddsim \ + --runType batch \ + --enableGun \ + --steeringFile "{input.steering_file}" \ + --random.seed 1{wildcards.INDEX} \ + --filter.tracker edep0 \ + -v WARNING \ + --numberOfEvents {params.N_EVENTS} \ + --compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \ + --outputFile {output} +""" + + +rule ecal_gaps_recon: + input: + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", + output: + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root", + log: + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log", + wildcard_constraints: + INDEX="\d{4}", + shell: """ +eicrecon {input} -Ppodio:output_file={output} +""" + + +rule ecal_gaps_org2py: + input: + notebook=workflow.source_path("ecal_gaps.org"), + converter=workflow.source_path("./org2py.awk"), + output: + "ecal_gaps.py" + shell: + """ +awk -f {input.converter} {input.notebook} > {output} +""" + +DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"] + +rule ecal_gaps: + input: + matplotlibrc=".matplotlibrc", + script="ecal_gaps.py", + # TODO pass as a file list? + _=expand( + "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", + DETECTOR_CONFIG=DETECTOR_CONFIG, + PARTICLE=["e-"], + ENERGY=["500MeV", "5GeV", "20GeV"], + PHASE_SPACE=["3to50deg", "45to135deg", "130to177deg"], + INDEX=range(1), + ), + output: + directory("results/ecal_gaps"), + threads: workflow.cores + shell: + """ +cleanup() {{ + echo Cleaning up + kill $WORKER_PID $SCHEDULER_PID +}} +trap cleanup EXIT + +PORT=$RANDOM +dask scheduler --port $PORT & +export DASK_SCHEDULER=localhost:$PORT +SCHEDULER_PID=$! +dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 & +WORKER_PID=$! +env \ +MATPLOTLIBRC={input.matplotlibrc} \ +OUTPUT_DIR={output} \ +python {input.script} +""" diff --git a/benchmarks/ecal_gaps/config.yml b/benchmarks/ecal_gaps/config.yml new file mode 100644 index 00000000..14e7ee35 --- /dev/null +++ b/benchmarks/ecal_gaps/config.yml @@ -0,0 +1,26 @@ +sim:ecal_gaps: + extends: .det_benchmark + stage: simulate + script: + - mkdir $LOCAL_DATA_PATH/input + - ln -s $LOCAL_DATA_PATH/input input + - snakemake --cores 2 + +bench:ecal_gaps: + extends: .det_benchmark + stage: benchmarks + needs: + - ["sim:ecal_gaps"] + script: + - ln -s $LOCAL_DATA_PATH/input input + - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps + - pip install -r benchmarks/ecal_gaps/requirements.txt + - snakemake --cores 8 ecal_gaps + +collect_results:backgrounds: + extends: .det_benchmark + stage: collect + needs: + - "bench:ecal_gaps" + script: + - ls -lrht diff --git a/benchmarks/ecal_gaps/ecal_gaps.org b/benchmarks/ecal_gaps/ecal_gaps.org new file mode 100644 index 00000000..23bd4737 --- /dev/null +++ b/benchmarks/ecal_gaps/ecal_gaps.org @@ -0,0 +1,192 @@ +#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:gap :async yes :results drawer :exports both + +#+TITLE: ePIC ECal gap study +#+AUTHOR: Dmitry Kalinkin +#+OPTIONS: d:t + +#+begin_src jupyter-python :results silent +import os +from pathlib import Path + +import awkward as ak +import boost_histogram as bh +import dask +import dask_awkward as dak +import dask_histogram as dh +import numpy as np +import uproot +#+end_src + +#+begin_src jupyter-python :results slient +from dask.distributed import Client +client = Client(os.environ.get("DASK_SCHEDULER", "localhost:8786")) +#+end_src + +* Plotting setup + +#+begin_src jupyter-python :results silent +import matplotlib as mpl +import matplotlib.pyplot as plt + +def setup_presentation_style(): + mpl.rcParams.update(mpl.rcParamsDefault) + plt.style.use('ggplot') + plt.rcParams.update({ + 'axes.labelsize': 8, + 'axes.titlesize': 9, + 'figure.titlesize': 9, + 'figure.figsize': (4, 3), + 'legend.fontsize': 7, + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'pgf.rcfonts': False, + }) + +setup_presentation_style() +#+end_src + +** Settings + +#+begin_src jupyter-python :results silent +DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG") + +output_dir=Path(os.environ.get("OUTPUT_DIR", "./")) +output_dir.mkdir(parents=True, exist_ok=True) +#+end_src + +* Analysis + +#+begin_src jupyter-python :results silent +def filter_name(name): + return ( + "PARAMETERS" not in name + and all(coll not in name for coll in ["_intMap", "_floatMap", "_stringMap", "_doubleMap"]) + ) + +import functools + +@functools.cache +def get_events(particle="e-", energy="20GeV", num_files=1): + events = uproot.dask( + {} + | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/3to50deg/{particle}_{energy}_3to50deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)} + | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/45to135deg/{particle}_{energy}_45to135deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)} + | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)} + , + filter_name=filter_name, open_files=False, steps_per_file=1, + ) + + pt = np.hypot(events["MCParticles.momentum.x"][:,0], events["MCParticles.momentum.y"][:,0]) + theta = np.arctan2(pt, events["MCParticles.momentum.z"][:,0]) + eta = -np.log(np.tan(theta / 2)) + p = np.hypot(pt, events["MCParticles.momentum.z"][:,0]) + + axis_eta = bh.axis.Regular(200, -4, 4) + + hist_norm = dh.factory( + eta, + axes=( + axis_eta, + ), + ).compute() + weight_lut = 1 / hist_norm.values(flow=True) + + def get_weight(array): + if ak.backend(array) == "typetracer": + ak.typetracer.touch_data(array) + return array + return ak.from_numpy(weight_lut[axis_eta.index(ak.to_numpy(array))]) + + weights = eta.map_partitions(get_weight) + + events["p_thrown"] = p + events["eta_thrown"] = eta + events["weights"] = weights + + return events, axis_eta +#+end_src + +#+begin_src jupyter-python +particle = "e-" + +for energy in ["500MeV", "5GeV", "20GeV"]: + events, axis_eta = get_events(particle=particle, energy=energy) + + for calo_name in ["EcalEndcapN", "EcalBarrelScFi", "EcalBarrelImaging", "EcalEndcapP"]: + cond = ak.sum(events[f"{calo_name}RecHits.energy"], axis=1) > 10e-3 # GeV + + hist = dh.factory( + events["eta_thrown"][cond], + (ak.sum(events[f"{calo_name}RecHits.energy"], axis=1) / events["p_thrown"])[cond], + axes=( + axis_eta, + bh.axis.Regular(100, 0.1, 1.1), + ), + weights=events["weights"][cond], + ).compute() + + cmap = plt.get_cmap(name="rainbow", lut=None).copy() + cmap.set_under("none") + + plt.pcolormesh( + hist.axes[0].edges, + hist.axes[1].edges, + hist.values().T, + cmap=cmap, + norm=mpl.colors.LogNorm( + vmin=np.min(hist.values()[hist.values() > 0]), + ), + ) + plt.colorbar() + plt.xlabel(r"$\eta_{thrown}$", loc="right") + plt.ylabel(r"$\sum E_{\mathrm{dep.}} / p_{\mathrm{thrown}}$", loc="top") + plt.title(f"{energy} {particle} in {calo_name}") + plt.minorticks_on() + plt.savefig(output_dir / f"ecal_gap_{particle}_{energy}_{calo_name}.png", bbox_inches="tight") + plt.show() + plt.clf() +#+end_src + +#+begin_src jupyter-python +particle = "e-" + +for energy in ["500MeV", "5GeV", "20GeV"]: + events, axis_eta = get_events(particle=particle, energy=energy) + + calos = ["EcalEndcapN", "EcalBarrelScFi", "EcalEndcapP"] + total_energy = sum([ + ak.sum(events[f"{calo_name}RecHits.energy"], axis=1) + for calo_name in calos + ]) + + hist = dh.factory( + events["eta_thrown"], + total_energy / events["p_thrown"], + axes=( + axis_eta, + bh.axis.Regular(100, 0.0, 1.1), + ), + weights=events["weights"], + ).compute() + + cmap = plt.get_cmap(name="rainbow", lut=None).copy() + cmap.set_under("none") + + plt.pcolormesh( + hist.axes[0].edges, + hist.axes[1].edges, + hist.values().T, + cmap=cmap, + norm=mpl.colors.LogNorm( + vmin=np.min(hist.values()[hist.values() > 0]), + ), + ) + plt.colorbar() + plt.xlabel(r"$\eta_{thrown}$", loc="right") + plt.ylabel(r"$\sum E_{\mathrm{dep.}} / p_{\mathrm{thrown}}$", loc="top") + plt.title(f"{energy} {particle}\n" + "+".join(calos)) + plt.minorticks_on() + plt.savefig(output_dir / f"ecal_gap_{particle}_{energy}_sum_all.png", bbox_inches="tight") + plt.show() + plt.clf() +#+end_src diff --git a/benchmarks/ecal_gaps/org2py.awk b/benchmarks/ecal_gaps/org2py.awk new file mode 100644 index 00000000..0d23cf25 --- /dev/null +++ b/benchmarks/ecal_gaps/org2py.awk @@ -0,0 +1,22 @@ +BEGIN { + in_src = 0 + IGNORECASE=1 +} + +/^#\+begin_src\s+[^\s]*python/ { + in_src = 1 + match($0, /^ */) + spaces = RLENGTH + next +} + +/^#\+end_src/ { + in_src = 0 + next +} + +in_src { + re = "^ {" spaces "}" + gsub(re,"") + print +} diff --git a/benchmarks/ecal_gaps/requirements.txt b/benchmarks/ecal_gaps/requirements.txt new file mode 100644 index 00000000..738152dd --- /dev/null +++ b/benchmarks/ecal_gaps/requirements.txt @@ -0,0 +1,7 @@ +awkward >= 2.4.0 +dask >= 2023 +dask_awkward +dask_histogram +distributed >= 2023 +pyhepmc +uproot >= 5.2.0