From 0daab46e3f6fb577443687fb610f3eca0446e045 Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Mon, 26 Feb 2024 16:09:58 -0500
Subject: [PATCH] add benchmarks/ecal_gaps

---
 .gitlab-ci.yml                        |   1 +
 Snakefile                             |   1 +
 benchmarks/ecal_gaps/Snakefile        |  94 +++++++++++++
 benchmarks/ecal_gaps/config.yml       |  26 ++++
 benchmarks/ecal_gaps/ecal_gaps.org    | 192 ++++++++++++++++++++++++++
 benchmarks/ecal_gaps/org2py.awk       |  22 +++
 benchmarks/ecal_gaps/requirements.txt |   7 +
 7 files changed, 343 insertions(+)
 create mode 100644 benchmarks/ecal_gaps/Snakefile
 create mode 100644 benchmarks/ecal_gaps/config.yml
 create mode 100644 benchmarks/ecal_gaps/ecal_gaps.org
 create mode 100644 benchmarks/ecal_gaps/org2py.awk
 create mode 100644 benchmarks/ecal_gaps/requirements.txt

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dae3f2f8..0e150857 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -129,6 +129,7 @@ get_data:
 
 include: 
 #  - local: 'benchmarks/backgrounds/config.yml'
+  - local: 'benchmarks/ecal_gaps/config.yml'
   - local: 'benchmarks/tracking_detectors/config.yml'
   - local: 'benchmarks/barrel_ecal/config.yml'
   - local: 'benchmarks/barrel_hcal/config.yml'
diff --git a/Snakefile b/Snakefile
index 6e4bc637..dcec820c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -19,6 +19,7 @@ else:
 
 include: "benchmarks/backgrounds/Snakefile"
 include: "benchmarks/barrel_ecal/Snakefile"
+include: "benchmarks/ecal_gaps/Snakefile"
 
 rule matplotlibrc:
     output:
diff --git a/benchmarks/ecal_gaps/Snakefile b/benchmarks/ecal_gaps/Snakefile
new file mode 100644
index 00000000..ed530469
--- /dev/null
+++ b/benchmarks/ecal_gaps/Snakefile
@@ -0,0 +1,94 @@
+import os
+
+
+rule ecal_gaps_sim:
+    input:
+        steering_file=provider.remote(remote_path("EPIC/EVGEN/SINGLE/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.steer")),
+    output:
+        "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+    log:
+        "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log",
+    wildcard_constraints:
+        PARTICLE="e-",
+        ENERGY="(500MeV|5GeV|20GeV)",
+        PHASE_SPACE="(3to50|45to135|130to177)deg",
+        INDEX="\d{4}",
+    params:
+        N_EVENTS=1000
+    shell:
+        """
+ddsim \
+  --runType batch \
+  --enableGun \
+  --steeringFile "{input.steering_file}" \
+  --random.seed 1{wildcards.INDEX} \
+  --filter.tracker edep0 \
+  -v WARNING \
+  --numberOfEvents {params.N_EVENTS} \
+  --compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \
+  --outputFile {output}
+"""
+
+
+rule ecal_gaps_recon:
+    input:
+        "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+    output:
+        "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
+    log:
+        "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
+    wildcard_constraints:
+        INDEX="\d{4}",
+    shell: """
+eicrecon {input} -Ppodio:output_file={output}
+"""
+
+
+rule ecal_gaps_org2py:
+    input:
+        notebook=workflow.source_path("ecal_gaps.org"),
+        converter=workflow.source_path("./org2py.awk"),
+    output:
+        "ecal_gaps.py"
+    shell:
+        """
+awk -f {input.converter} {input.notebook} > {output}
+"""
+
+DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"]
+
+rule ecal_gaps:
+    input:
+        matplotlibrc=".matplotlibrc",
+        script="ecal_gaps.py",
+        # TODO pass as a file list?
+        _=expand(
+            "sim_output/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
+            DETECTOR_CONFIG=DETECTOR_CONFIG,
+            PARTICLE=["e-"],
+            ENERGY=["500MeV", "5GeV", "20GeV"],
+            PHASE_SPACE=["3to50deg", "45to135deg", "130to177deg"],
+            INDEX=range(1),
+        ),
+    output:
+        directory("results/ecal_gaps"),
+    threads: workflow.cores
+    shell:
+        """
+cleanup() {{
+  echo Cleaning up
+  kill $WORKER_PID $SCHEDULER_PID
+}}
+trap cleanup EXIT
+
+PORT=$RANDOM
+dask scheduler --port $PORT &
+export DASK_SCHEDULER=localhost:$PORT
+SCHEDULER_PID=$!
+dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 &
+WORKER_PID=$!
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+OUTPUT_DIR={output} \
+python {input.script}
+"""
diff --git a/benchmarks/ecal_gaps/config.yml b/benchmarks/ecal_gaps/config.yml
new file mode 100644
index 00000000..14e7ee35
--- /dev/null
+++ b/benchmarks/ecal_gaps/config.yml
@@ -0,0 +1,26 @@
+sim:ecal_gaps:
+  extends: .det_benchmark
+  stage: simulate
+  script:
+    - mkdir $LOCAL_DATA_PATH/input
+    - ln -s $LOCAL_DATA_PATH/input input
+    - snakemake --cores 2 
+
+bench:ecal_gaps:
+  extends: .det_benchmark
+  stage: benchmarks
+  needs:
+    - ["sim:ecal_gaps"]
+  script:
+    - ln -s $LOCAL_DATA_PATH/input input
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - pip install -r benchmarks/ecal_gaps/requirements.txt
+    - snakemake --cores 8 ecal_gaps
+
+collect_results:backgrounds:
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:ecal_gaps"
+  script:
+    - ls -lrht
diff --git a/benchmarks/ecal_gaps/ecal_gaps.org b/benchmarks/ecal_gaps/ecal_gaps.org
new file mode 100644
index 00000000..23bd4737
--- /dev/null
+++ b/benchmarks/ecal_gaps/ecal_gaps.org
@@ -0,0 +1,192 @@
+#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:gap :async yes :results drawer :exports both
+
+#+TITLE: ePIC ECal gap study
+#+AUTHOR: Dmitry Kalinkin
+#+OPTIONS: d:t
+
+#+begin_src jupyter-python :results silent
+import os
+from pathlib import Path
+
+import awkward as ak
+import boost_histogram as bh
+import dask
+import dask_awkward as dak
+import dask_histogram as dh
+import numpy as np
+import uproot
+#+end_src   
+
+#+begin_src jupyter-python :results slient
+from dask.distributed import Client
+client = Client(os.environ.get("DASK_SCHEDULER", "localhost:8786"))
+#+end_src
+
+* Plotting setup
+
+#+begin_src jupyter-python :results silent
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+       
+def setup_presentation_style():
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    plt.style.use('ggplot')
+    plt.rcParams.update({
+        'axes.labelsize': 8,
+        'axes.titlesize': 9,
+        'figure.titlesize': 9,
+        'figure.figsize': (4, 3),
+        'legend.fontsize': 7,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'pgf.rcfonts': False,
+    })
+
+setup_presentation_style()
+#+end_src       
+
+** Settings
+
+#+begin_src jupyter-python :results silent
+DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
+
+output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
+output_dir.mkdir(parents=True, exist_ok=True)
+#+end_src
+
+* Analysis
+
+#+begin_src jupyter-python :results silent
+def filter_name(name):
+    return (
+        "PARAMETERS" not in name
+        and all(coll not in name for coll in ["_intMap", "_floatMap", "_stringMap", "_doubleMap"])
+    )
+
+import functools
+
+@functools.cache
+def get_events(particle="e-", energy="20GeV", num_files=1):
+    events = uproot.dask(
+        {}
+        | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/3to50deg/{particle}_{energy}_3to50deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)}
+        | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/45to135deg/{particle}_{energy}_45to135deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)}
+        | {f"sim_output/{DETECTOR_CONFIG}/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{INDEX:04d}.eicrecon.tree.edm4eic.root": "events" for INDEX in range(num_files)}
+        ,
+        filter_name=filter_name, open_files=False, steps_per_file=1,
+    )
+
+    pt = np.hypot(events["MCParticles.momentum.x"][:,0], events["MCParticles.momentum.y"][:,0])
+    theta = np.arctan2(pt, events["MCParticles.momentum.z"][:,0])
+    eta = -np.log(np.tan(theta / 2))
+    p = np.hypot(pt, events["MCParticles.momentum.z"][:,0])
+
+    axis_eta = bh.axis.Regular(200, -4, 4)
+
+    hist_norm = dh.factory(
+        eta,
+        axes=(
+            axis_eta,
+        ),
+    ).compute()
+    weight_lut = 1 / hist_norm.values(flow=True)
+
+    def get_weight(array):
+        if ak.backend(array) == "typetracer":
+            ak.typetracer.touch_data(array)
+            return array
+        return ak.from_numpy(weight_lut[axis_eta.index(ak.to_numpy(array))])
+
+    weights = eta.map_partitions(get_weight)
+
+    events["p_thrown"] = p
+    events["eta_thrown"] = eta
+    events["weights"] = weights
+
+    return events, axis_eta
+#+end_src
+
+#+begin_src jupyter-python
+particle = "e-"
+
+for energy in ["500MeV", "5GeV", "20GeV"]:
+    events, axis_eta = get_events(particle=particle, energy=energy)
+
+    for calo_name in ["EcalEndcapN", "EcalBarrelScFi", "EcalBarrelImaging", "EcalEndcapP"]:
+        cond = ak.sum(events[f"{calo_name}RecHits.energy"], axis=1) > 10e-3 # GeV
+
+        hist = dh.factory(
+            events["eta_thrown"][cond],
+            (ak.sum(events[f"{calo_name}RecHits.energy"], axis=1) / events["p_thrown"])[cond],
+            axes=(
+                axis_eta,
+                bh.axis.Regular(100, 0.1, 1.1),
+            ),
+            weights=events["weights"][cond],
+        ).compute()
+
+        cmap = plt.get_cmap(name="rainbow", lut=None).copy()
+        cmap.set_under("none")
+
+        plt.pcolormesh(
+            hist.axes[0].edges,
+            hist.axes[1].edges,
+            hist.values().T,
+            cmap=cmap,
+            norm=mpl.colors.LogNorm(
+                vmin=np.min(hist.values()[hist.values() > 0]),
+            ),
+        )
+        plt.colorbar()
+        plt.xlabel(r"$\eta_{thrown}$", loc="right")
+        plt.ylabel(r"$\sum E_{\mathrm{dep.}} / p_{\mathrm{thrown}}$", loc="top")
+        plt.title(f"{energy} {particle} in {calo_name}")
+        plt.minorticks_on()
+        plt.savefig(output_dir / f"ecal_gap_{particle}_{energy}_{calo_name}.png", bbox_inches="tight")
+        plt.show()
+        plt.clf()
+#+end_src
+
+#+begin_src jupyter-python
+particle = "e-"
+
+for energy in ["500MeV", "5GeV", "20GeV"]:
+    events, axis_eta = get_events(particle=particle, energy=energy)
+
+    calos = ["EcalEndcapN", "EcalBarrelScFi", "EcalEndcapP"]
+    total_energy = sum([
+        ak.sum(events[f"{calo_name}RecHits.energy"], axis=1)
+        for calo_name in calos
+    ])
+
+    hist = dh.factory(
+        events["eta_thrown"],
+        total_energy / events["p_thrown"],
+        axes=(
+            axis_eta,
+            bh.axis.Regular(100, 0.0, 1.1),
+        ),
+        weights=events["weights"],
+    ).compute()
+
+    cmap = plt.get_cmap(name="rainbow", lut=None).copy()
+    cmap.set_under("none")
+
+    plt.pcolormesh(
+        hist.axes[0].edges,
+        hist.axes[1].edges,
+        hist.values().T,
+        cmap=cmap,
+        norm=mpl.colors.LogNorm(
+            vmin=np.min(hist.values()[hist.values() > 0]),
+        ),
+    )
+    plt.colorbar()
+    plt.xlabel(r"$\eta_{thrown}$", loc="right")
+    plt.ylabel(r"$\sum E_{\mathrm{dep.}} / p_{\mathrm{thrown}}$", loc="top")
+    plt.title(f"{energy} {particle}\n" + "+".join(calos))
+    plt.minorticks_on()
+    plt.savefig(output_dir / f"ecal_gap_{particle}_{energy}_sum_all.png", bbox_inches="tight")
+    plt.show()
+    plt.clf()
+#+end_src
diff --git a/benchmarks/ecal_gaps/org2py.awk b/benchmarks/ecal_gaps/org2py.awk
new file mode 100644
index 00000000..0d23cf25
--- /dev/null
+++ b/benchmarks/ecal_gaps/org2py.awk
@@ -0,0 +1,22 @@
+BEGIN {
+	in_src = 0
+	IGNORECASE=1
+}
+
+/^#\+begin_src\s+[^\s]*python/ {
+	in_src = 1
+	match($0, /^ */)
+	spaces = RLENGTH
+	next
+}
+
+/^#\+end_src/ {
+	in_src = 0
+	next
+}
+
+in_src {
+	re = "^ {" spaces "}"
+	gsub(re,"")
+	print
+}
diff --git a/benchmarks/ecal_gaps/requirements.txt b/benchmarks/ecal_gaps/requirements.txt
new file mode 100644
index 00000000..738152dd
--- /dev/null
+++ b/benchmarks/ecal_gaps/requirements.txt
@@ -0,0 +1,7 @@
+awkward >= 2.4.0
+dask >= 2023
+dask_awkward
+dask_histogram
+distributed >= 2023
+pyhepmc
+uproot >= 5.2.0