diff --git a/benchmarks/backwards_ecal/Snakefile b/benchmarks/backwards_ecal/Snakefile index 9b0b1559..5efaf844 100644 --- a/benchmarks/backwards_ecal/Snakefile +++ b/benchmarks/backwards_ecal/Snakefile @@ -64,14 +64,69 @@ exec env DETECTOR_CONFIG={params.DETECTOR_CONFIG} \ """ -rule backwards_ecal_recon_many: +rule backwards_ecal_local_sim_list: input: expand( "sim_output/backwards_ecal/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{{ENERGY}}/{{PHASE_SPACE}}/{{PARTICLE}}_{{ENERGY}}_{{PHASE_SPACE}}.{INDEX:04d}.eicrecon.tree.edm4eic.root", INDEX=range(20), ), output: - touch("sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/flag"), + "listing/backwards_ecal/local/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst", + run: + with open(output[0], "wt") as fp: + fp.write("\n".join(input)) + + +if config.get("stream_from_xrootd", True) not in [False, "", "0", "false"]: + rule backwards_ecal_campaign_sim_list: + output: + "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst", + params: + search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/", + shell: """ + xrdfs root://dtn-eic.jlab.org/ ls /work/eic2/{params.search_path} \ + | awk '{{ print "root://dtn-eic.jlab.org/"$1; }}' \ + | sort \ + > {output} + if [ ! -s {output} ]; then + echo "Got an empty file listing for path \"\"" + exit 1 + fi + """ +else: + checkpoint backwards_ecal_campaign_sim_list_checkpoint: + output: + "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig", + params: + search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/", + shell: """ + xrdfs root://dtn-eic.jlab.org/ ls /work/eic2/{params.search_path} \ + | sed -e 's#^/work/eic2/##' \ + | sort \ + > {output} + if [ ! -s {output} ]; then + echo "Got an empty file listing for path \"\"" + exit 1 + fi + """ + + def get_backwards_ecal_campaign_sim_list(wildcards): + with checkpoints.backwards_ecal_campaign_sim_list_checkpoint.get(**wildcards).output[0].open() as fp: + return [line.rstrip() for line in fp.readlines()] + + rule backwards_ecal_campaign_sim_list: + input: + # depend on paths from the file list + get_backwards_ecal_campaign_sim_list, + orig_list="listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig", + output: + "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst", + shell: """ + cp {input.orig_list} {output} + """ + + +ruleorder: backwards_ecal_local_sim_list > backwards_ecal_campaign_sim_list DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"] @@ -79,7 +134,7 @@ DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"] rule backwards_ecal: input: expand( - "sim_output/backwards_ecal/" + DETECTOR_CONFIG + "/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/flag", + "listing/backwards_ecal/{{CAMPAIGN}}/" + DETECTOR_CONFIG + "/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst", PARTICLE=["pi-", "e-"], ENERGY=[ "100MeV", @@ -96,14 +151,18 @@ rule backwards_ecal: matplotlibrc=".matplotlibrc", script="benchmarks/backwards_ecal/backwards_ecal.py", output: - directory("results/backwards_ecal") + directory("results/backwards_ecal/{CAMPAIGN}/") shell: """ +if [[ "{wildcards.CAMPAIGN}" == "local" ]]; then +export PLOT_TITLE="Benchmark simulation" +else +export PLOT_TITLE="\\textbf{{ePIC}} Simulation {wildcards.CAMPAIGN}" +fi env \ MATPLOTLIBRC={input.matplotlibrc} \ DETECTOR_CONFIG=""" + DETECTOR_CONFIG + """ \ -PLOT_TITLE=""" + DETECTOR_CONFIG + """ \ -INPUT_PATH_FORMAT=sim_output/backwards_ecal/""" + DETECTOR_CONFIG + """/{{particle}}/{{energy}}/130to177deg/{{particle}}_{{energy}}_130to177deg.{{ix:04d}}.eicrecon.tree.edm4eic.root \ +INPUT_PATH_FORMAT=listing/backwards_ecal/{wildcards.CAMPAIGN}/""" + DETECTOR_CONFIG + """/{{particle}}/{{energy}}/130to177deg.lst \ OUTPUT_DIR={output} \ python {input.script} """ diff --git a/benchmarks/backwards_ecal/backwards_ecal.org b/benchmarks/backwards_ecal/backwards_ecal.org index 27f1094c..6ca1e916 100644 --- a/benchmarks/backwards_ecal/backwards_ecal.org +++ b/benchmarks/backwards_ecal/backwards_ecal.org @@ -57,7 +57,6 @@ setup_presentation_style() DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG") PLOT_TITLE=os.environ.get("PLOT_TITLE") INPUT_PATH_FORMAT=os.environ.get("INPUT_PATH_FORMAT", "EPIC/RECO/24.04.0/epic_craterlake/SINGLE/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{ix:04d}.eicrecon.tree.edm4eic.root") -INPUT_PATH_INDEX_RANGE=list(range(20)) output_dir=Path(os.environ.get("OUTPUT_DIR", "./")) output_dir.mkdir(parents=True, exist_ok=True) @@ -92,23 +91,26 @@ energies = [ "10GeV", "20GeV", ] - filter_name = [ "MCParticles.*", - "*EcalEndcapNRecHits*", "*EcalEndcapNClusters*", ] pi_eval = {} e_eval = {} +def readlist(path): + with open(path, "rt") as fp: + paths = [line.rstrip() for line in fp.readlines()] + return paths + for energy in energies: pi_eval[energy] = filter_pointing(uproot.concatenate( - {INPUT_PATH_FORMAT.format(particle="pi-", energy=energy, ix=ix): "events" for ix in INPUT_PATH_INDEX_RANGE}, + {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="pi-", energy=energy))}, filter_name=filter_name, )) e_eval[energy] = filter_pointing(uproot.concatenate( - {INPUT_PATH_FORMAT.format(particle="e-", energy=energy, ix=ix): "events" for ix in INPUT_PATH_INDEX_RANGE}, + {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="e-", energy=energy))}, filter_name=filter_name, )) #+end_src @@ -118,69 +120,59 @@ for energy in energies: #+begin_src jupyter-python fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) -fig.suptitle(PLOT_TITLE) - axs = np.ravel(np.array(axs)) sigmas_rel_FWHM_cb = {} fractions_below = {} for ix, energy in enumerate(energies): - for use_clusters in [False, True]: - energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3")) - if use_clusters: - clf_label = "leading cluster" - else: - clf_label = "sum all hits" - def clf(events): - if use_clusters: - return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value - else: - return ak.sum(events["EcalEndcapNRecHits.energy"], axis=-1) / energy_value - e_pred = clf(e_eval[energy]) - - plt.sca(axs[ix]) - counts, bins, patches = plt.hist(e_pred, weights=np.full_like(e_pred, 1.0 / ak.num(e_pred, axis=0)), bins=np.linspace(0.01, 1.01, 101), label=rf"$e^-$ {clf_label}", hatch=None if use_clusters else r"xxx", alpha=0.8 if use_clusters else 1.) - plt.title(f"{energy}") + energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3")) + clf_label = PLOT_TITLE + def clf(events): + return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value + e_pred = clf(e_eval[energy]) + + plt.sca(axs[ix]) + counts, bins, patches = plt.hist(e_pred, weights=np.full_like(e_pred, 1.0 / ak.num(e_pred, axis=0)), bins=np.linspace(0.01, 1.01, 101), label=rf"$e^-$ {clf_label}") + plt.title(f"{energy}") + + e_over_p = (bins[1:] + bins[:-1]) / 2 + import scipy.stats + def f(x, n, beta, m, loc, scale): + return n * scipy.stats.crystalball.pdf(x, beta, m, loc, scale) + p0 = (np.sum(counts[10:]), 2., 3., 0.95, 0.05) + + try: + import scipy.optimize + par, pcov = scipy.optimize.curve_fit(f, e_over_p[5:], counts[5:], p0=p0, maxfev=10000) + except RuntimeError: + par = None + plt.plot(e_over_p, f(e_over_p, *par), label=rf"Crystal Ball fit", color="tab:green", lw=0.8) - e_over_p = (bins[1:] + bins[:-1]) / 2 - import scipy.stats - def f(x, n, beta, m, loc, scale): - return n * scipy.stats.crystalball.pdf(x, beta, m, loc, scale) - p0 = (np.sum(counts[10:]), 2., 3., 0.95, 0.05) - - try: - import scipy.optimize - par, pcov = scipy.optimize.curve_fit(f, e_over_p[5:], counts[5:], p0=p0, maxfev=10000) - except RuntimeError: - par = None - plt.plot(e_over_p, f(e_over_p, *par), label=rf"Crystal Ball fit", color="tab:green" if use_clusters else "green", lw=0.8) - - def summarize_fit(par): - _, _, _, loc_cb, scale_cb = par - # Calculate FWHM - y_max = np.max(f(np.linspace(0., 1., 100), *par)) - f_prime = lambda x: f(x, *par) - y_max / 2 - x_plus, = scipy.optimize.root(f_prime, loc_cb + scale_cb).x - x_minus, = scipy.optimize.root(f_prime, loc_cb - scale_cb).x - color = "cyan" if use_clusters else "orange" - plt.axvline(x_minus, ls="--", lw=0.75, color=patches[0].get_facecolor(), label=r"$\mu - $FWHM") - plt.axvline(x_plus, ls=":", lw=0.75, color=patches[0].get_facecolor(), label=r"$\mu + $FWHM") - fwhm = (x_plus - x_minus) / loc_cb - sigma_rel_FWHM_cb = fwhm / 2 / np.sqrt(2 * np.log(2)) - - cutoff_x = loc_cb - fwhm - fraction_below = np.sum(counts[e_over_p < cutoff_x]) / ak.num(e_pred, axis=0) - - return sigma_rel_FWHM_cb, fraction_below - - sigma_rel_FWHM_cb, fraction_below = summarize_fit(par) - sigmas_rel_FWHM_cb.setdefault(clf_label, {})[energy] = sigma_rel_FWHM_cb - fractions_below.setdefault(clf_label, {})[energy] = fraction_below + def summarize_fit(par): + _, _, _, loc_cb, scale_cb = par + # Calculate FWHM + y_max = np.max(f(np.linspace(0., 1., 100), *par)) + f_prime = lambda x: f(x, *par) - y_max / 2 + x_plus, = scipy.optimize.root(f_prime, loc_cb + scale_cb).x + x_minus, = scipy.optimize.root(f_prime, loc_cb - scale_cb).x + plt.axvline(x_minus, ls="--", lw=0.75, color=patches[0].get_facecolor(), label=r"$\mu - $FWHM") + plt.axvline(x_plus, ls=":", lw=0.75, color=patches[0].get_facecolor(), label=r"$\mu + $FWHM") + fwhm = (x_plus - x_minus) / loc_cb + sigma_rel_FWHM_cb = fwhm / 2 / np.sqrt(2 * np.log(2)) - plt.legend() - plt.xlabel("$E/p$", loc="right") - plt.ylabel("Event yield", loc="top") + cutoff_x = loc_cb - fwhm + fraction_below = np.sum(counts[e_over_p < cutoff_x]) / ak.num(e_pred, axis=0) + + return sigma_rel_FWHM_cb, fraction_below + + sigma_rel_FWHM_cb, fraction_below = summarize_fit(par) + sigmas_rel_FWHM_cb.setdefault(clf_label, {})[energy] = sigma_rel_FWHM_cb + fractions_below.setdefault(clf_label, {})[energy] = fraction_below + + plt.legend() + plt.xlabel("$E/p$", loc="right") + plt.ylabel("Event yield", loc="top") fig.savefig(output_dir / f"resolution_plots.pdf", bbox_inches="tight") fig.savefig(output_dir / f"resolution_plots.png", bbox_inches="tight") @@ -207,22 +199,30 @@ for clf_label, sigma_rel_FWHM_cb in sigmas_rel_FWHM_cb.items(): energy_values, sigma_over_e, marker=".", + ls="none", label=f"{clf_label}" ) + xmin = np.min(energy_values[cond]) + xmax = np.max(energy_values[cond]) + xs = np.arange(xmin, xmax, 0.1) plt.plot( - energy_values[cond], - f(energy_values[cond], *par), - color="black", + xs, + f(xs, *par), ls="--", lw=0.5, - label=f"{clf_label}, ${np.ceil(stochastic * 10) / 10:.1f}\% / \sqrt{{E}} \oplus {np.ceil(constant * 10) / 10:.1f}\%$", + label=f"Functional fit: ${np.ceil(stochastic * 10) / 10:.1f}\% / \sqrt{{E}} \oplus {np.ceil(constant * 10) / 10:.1f}\%$", ) -plt.plot( - energy_values, - np.sqrt((1 / energy_values) ** 2 + (1 / np.sqrt(energy_values)) ** 2 + 1 ** 2), - color="black", label=r"YR requirement $1\% / E \oplus 2.5\% / \sqrt{E} \oplus 1\%$", +xmin = np.min(energy_values) +xmax = np.max(energy_values) * 1.05 +xs = np.arange(xmin, xmax + 0.1, 0.1) +plt.fill_between( + xs, + np.sqrt((2 / np.sqrt(xs)) ** 2 + 1 ** 2), + np.sqrt((2 / np.sqrt(xs)) ** 2 + 3 ** 2), + lw=0., alpha=0.2, color="black", label=r"YR requirement $2\% / \sqrt{E} \oplus (1-3)\%$", ) -plt.title(INPUT_PATH_FORMAT) +plt.xlim(0., xmax) +plt.ylim(top=6.) plt.legend() plt.xlabel("Energy, GeV", loc="right") plt.ylabel(r"$\sigma_{E} / E$ derived from FWHM, %", loc="top") @@ -238,10 +238,6 @@ fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) -fig.suptitle(PLOT_TITLE) -fig_log.suptitle(PLOT_TITLE) -fig_roc.suptitle(PLOT_TITLE) - axs = np.ravel(np.array(axs)) axs_log = np.ravel(np.array(axs_log)) axs_roc = np.ravel(np.array(axs_roc)) @@ -249,49 +245,42 @@ axs_roc = np.ravel(np.array(axs_roc)) rocs = {} for ix, energy in enumerate(energies): - for use_clusters in [False, True]: - energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3")) - if use_clusters: - clf_label = "leading cluster" - else: - clf_label = "sum all hits" - def clf(events): - if use_clusters: - return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value - else: - return ak.sum(events["EcalEndcapNRecHits.energy"], axis=-1) / energy_value - e_pred = clf(e_eval[energy]) - pi_pred = clf(pi_eval[energy]) - - for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]: - plt.sca(ax) - plt.hist(e_pred, weights=np.full_like(e_pred, 1.0 / ak.num(e_pred, axis=0)), bins=np.linspace(0., 1.01, 101), label=rf"$e^-$ {clf_label}", hatch=None if use_clusters else r"xxx", alpha=0.8 if use_clusters else 1.) - plt.hist(pi_pred, weights=np.full_like(pi_pred, 1.0 / ak.num(pi_pred, axis=0)), bins=np.linspace(0., 1.01, 101), label=rf"$\pi^-$ {clf_label}", histtype="step") - plt.title(f"{energy}") - plt.legend() - plt.xlabel("Classifier output") - plt.ylabel("Event yield") - if do_log: - plt.yscale("log") - - plt.sca(axs_roc[ix]) - fpr, tpr, _ = roc_curve( - np.concatenate([np.ones_like(e_pred), np.zeros_like(pi_pred)]), - np.concatenate([e_pred, pi_pred]), - ) - cond = fpr != 0 # avoid infinite rejection (region of large uncertainty) - cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty) - def mk_interp(tpr, fpr): - def interp(eff): - return np.interp(eff, tpr, fpr) - return interp - rocs.setdefault(clf_label, {})[energy] = mk_interp(tpr, fpr) - plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{clf_label}") - plt.yscale("log") + energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3")) + clf_label = PLOT_TITLE + def clf(events): + return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value + e_pred = clf(e_eval[energy]) + pi_pred = clf(pi_eval[energy]) + + for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]: + plt.sca(ax) + plt.hist(e_pred, weights=np.full_like(e_pred, 1.0 / ak.num(e_pred, axis=0)), bins=np.linspace(0., 1.01, 101), label=rf"$e^-$ {clf_label}") + plt.hist(pi_pred, weights=np.full_like(pi_pred, 1.0 / ak.num(pi_pred, axis=0)), bins=np.linspace(0., 1.01, 101), label=rf"$\pi^-$ {clf_label}", histtype="step") plt.title(f"{energy}") - plt.legend(loc="lower left") - plt.xlabel("Electron efficiency, %") - plt.ylabel("Pion rejection factor") + plt.legend() + plt.xlabel("Classifier output") + plt.ylabel("Event yield") + if do_log: + plt.yscale("log") + + plt.sca(axs_roc[ix]) + fpr, tpr, _ = roc_curve( + np.concatenate([np.ones_like(e_pred), np.zeros_like(pi_pred)]), + np.concatenate([e_pred, pi_pred]), + ) + cond = fpr != 0 # avoid infinite rejection (region of large uncertainty) + cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty) + def mk_interp(tpr, fpr): + def interp(eff): + return np.interp(eff, tpr, fpr) + return interp + rocs.setdefault(clf_label, {})[energy] = mk_interp(tpr, fpr) + plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{clf_label}") + plt.yscale("log") + plt.title(f"{energy}") + plt.legend(loc="lower left") + plt.xlabel("Electron efficiency, %") + plt.ylabel("Pion rejection factor") fig.savefig(output_dir / f"pred.pdf", bbox_inches="tight") fig.savefig(output_dir / f"pred.png", bbox_inches="tight") @@ -311,8 +300,9 @@ for clf_label, roc in rocs.items(): marker=".", label=f"{clf_label}", ) +xmax = np.max(energy_values) * 1.05 +plt.xlim(0., xmax) plt.yscale("log") -plt.title(INPUT_PATH_FORMAT) plt.legend() plt.xlabel("Energy, GeV") plt.ylabel("Pion rejection at 95%") diff --git a/benchmarks/backwards_ecal/config.yml b/benchmarks/backwards_ecal/config.yml index 7048f8e7..68a183e5 100644 --- a/benchmarks/backwards_ecal/config.yml +++ b/benchmarks/backwards_ecal/config.yml @@ -16,7 +16,7 @@ sim:backwards_ecal: ] script: - | - snakemake $SNAKEMAKE_FLAGS --cores 5 sim_output/backwards_ecal/${DETECTOR_CONFIG}/${PARTICLE}/${MOMENTUM}/130to177deg/flag + snakemake $SNAKEMAKE_FLAGS --cores 5 listing/backwards_ecal/local/${DETECTOR_CONFIG}/${PARTICLE}/${MOMENTUM}/130to177deg.lst bench:backwards_ecal: extends: .det_benchmark @@ -26,7 +26,17 @@ bench:backwards_ecal: script: - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps - pip install -r benchmarks/backwards_ecal/requirements.txt - - snakemake $SNAKEMAKE_FLAGS --cores 1 backwards_ecal + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/backwards_ecal/local + +bench:backwards_ecal_campaigns: + extends: .det_benchmark + stage: benchmarks + when: manual + timeout: 4 hours + script: + - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps + - pip install -r benchmarks/backwards_ecal/requirements.txt + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/backwards_ecal/24.10.1 collect_results:backwards_ecal: extends: .det_benchmark @@ -36,5 +46,5 @@ collect_results:backwards_ecal: script: - ls -lrht - mv results{,_save}/ # move results directory out of the way to preserve it - - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output backwards_ecal + - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/backwards_ecal/local - mv results{_save,}/