From a3048cbb8a602ea3c4c63dd4d419dac969941d13 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Thu, 21 Mar 2024 22:30:42 +0100
Subject: [PATCH] add --dry

---
 src/pinefarm/cli/autogen.py                   |  8 ++--
 src/pinefarm/cli/install.py                   |  1 +
 src/pinefarm/cli/run.py                       | 35 ++++++++------
 src/pinefarm/external/__init__.py             |  5 +-
 src/pinefarm/external/interface.py            |  7 +--
 .../external/nnlojet/nnpdf_interface.py       |  8 ++--
 src/pinefarm/external/nnlojet/runcardgen.py   |  9 +++-
 src/pinefarm/external/nnlojet/runner.py       | 46 ++++++++-----------
 8 files changed, 65 insertions(+), 54 deletions(-)

diff --git a/src/pinefarm/cli/autogen.py b/src/pinefarm/cli/autogen.py
index b6caf11..466b75a 100644
--- a/src/pinefarm/cli/autogen.py
+++ b/src/pinefarm/cli/autogen.py
@@ -1,11 +1,13 @@
 """Autogenerate pinecards from NNPDF metadata"""
 
 import click
+import rich
 
 from .. import configs
 from ..external.nnlojet import generate_pinecard_from_nnpdf
 from ._base import command
 
+
 @command.command("autogen")
 @click.argument("dataset", nargs=1)
 @click.option(
@@ -20,6 +22,6 @@ def runcards(dataset, target):
     if target == "NNLOJET":
         output_runcards = generate_pinecard_from_nnpdf(dataset, output_path=output)
 
-    print("Runcards written to: ")
-    print("\n".join(str(i) for i in output_runcards))
-    print("metadata.txt might be empty or incomplete, please modifiy it manually")
+    rich.print("Runcards written to: ")
+    rich.print("    " + "\n".join(str(i) for i in output_runcards))
+    rich.print("metadata.txt might be empty or incomplete, please modifiy it manually")
diff --git a/src/pinefarm/cli/install.py b/src/pinefarm/cli/install.py
index 30f7572..3efebc4 100644
--- a/src/pinefarm/cli/install.py
+++ b/src/pinefarm/cli/install.py
@@ -41,6 +41,7 @@ def lhapdf():
     install.update_environ()
     install.lhapdf()
 
+
 @subcommand.command()
 def nnlojet():
     """Install NNLOJET."""
diff --git a/src/pinefarm/cli/run.py b/src/pinefarm/cli/run.py
index 905832d..f2be800 100644
--- a/src/pinefarm/cli/run.py
+++ b/src/pinefarm/cli/run.py
@@ -1,6 +1,7 @@
 """Compute a dataset and compare using a given PDF."""
 
 import pathlib
+import sys
 import time
 
 import click
@@ -16,7 +17,8 @@
 @click.argument("dataset")
 @click.argument("theory-path", type=click.Path(exists=True))
 @click.option("--pdf", default="NNPDF31_nlo_as_0118_luxqed")
-def subcommand(dataset, theory_path, pdf):
+@click.option("--dry", is_flag=True, help="Don't execute the underlying code")
+def subcommand(dataset, theory_path, pdf, dry):
     """Compute a dataset and compare using a given PDF.
 
     Given a DATASET name and a THEORY-PATH, a runcard is executed with the
@@ -25,16 +27,6 @@ def subcommand(dataset, theory_path, pdf):
     The given PDF (default: `NNPDF31_nlo_as_0118_luxqed`) will be used to
     compare original results with PineAPPL interpolation.
 
-    """
-    # read theory card from file
-    with open(theory_path) as f:
-        theory_card = yaml.safe_load(f)
-    main(dataset, theory_card, pdf)
-
-
-def main(dataset, theory, pdf):
-    """Compute a dataset and compare using a given PDF.
-
     Parameters
     ----------
     dataset : str
@@ -44,7 +36,12 @@ def main(dataset, theory, pdf):
     pdf : str
         pdf name
 
+
     """
+    # read theory card from file
+    with open(theory_path) as f:
+        theory_card = yaml.safe_load(f)
+
     dataset = pathlib.Path(dataset).name
     timestamp = None
 
@@ -64,11 +61,23 @@ def main(dataset, theory, pdf):
     except UnboundLocalError as e:
         raise UnboundLocalError(f"Runcard {dataset} could not be found") from e
 
-
     rich.print(f"Computing [{datainfo.color}]{dataset}[/]...")
-    runner = datainfo.external(dataset, theory, pdf, timestamp=timestamp)
+    runner = datainfo.external(dataset, theory_card, pdf, timestamp=timestamp)
 
     install_reqs(runner, pdf)
+
+    # Run the preparation step of the runner (if any)
+    runner_stop = runner.preparation()
+    if dry or runner_stop:
+        rich.print(
+            f"""Running in dry mode, exiting now.
+The preparation step can be found in:
+    {runner.dest}"""
+        )
+        sys.exit(0)
+
+    ###### <this part will eventually go to -prepare->
+
     run_dataset(runner)
 
 
diff --git a/src/pinefarm/external/__init__.py b/src/pinefarm/external/__init__.py
index 1af29e5..69e8db0 100644
--- a/src/pinefarm/external/__init__.py
+++ b/src/pinefarm/external/__init__.py
@@ -29,8 +29,9 @@ def decide_external_tool(dsname):
     # or a prefix in the pinecard
 
     if dsname.startswith("NNLOJET"):
-        from .external import NNLOJET
-        return NNLOJET.NNLOJET, "blue"
+        from .nnlojet import NNLOJET
+
+        return NNLOJET, "blue"
 
     # DIS with yadism
     if (configs["paths"]["runcards"] / dsname / "observable.yaml").exists():
diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py
index 1c7b85b..cd754d6 100644
--- a/src/pinefarm/external/interface.py
+++ b/src/pinefarm/external/interface.py
@@ -39,9 +39,6 @@ def __init__(self, name, theory, pdf, timestamp=None):
         if timestamp is None:
             self.dest = tools.create_output_folder(self.name, self.theory["ID"])
         else:
-            import ipdb
-
-            ipdb.set_trace()
             self.dest = configs.configs["paths"]["results"] / (
                 str(theory["ID"]) + "-" + self.name + "-" + self.timestamp
             )
@@ -71,6 +68,10 @@ def update_with_tmp(self):
     def install():
         """Install all needed programs."""
 
+    def preparation(self):
+        """Run the preparation method of the runner"""
+        return False
+
     @abc.abstractmethod
     def run(self):
         """Execute the program."""
diff --git a/src/pinefarm/external/nnlojet/nnpdf_interface.py b/src/pinefarm/external/nnlojet/nnpdf_interface.py
index 6335104..1f9b031 100755
--- a/src/pinefarm/external/nnlojet/nnpdf_interface.py
+++ b/src/pinefarm/external/nnlojet/nnpdf_interface.py
@@ -16,10 +16,6 @@
 
 import numpy as np
 from ruamel.yaml import YAML, CommentedMap
-from validphys.api import API
-from validphys.datafiles import path_vpdata
-from validphys.theorydbutils import fetch_theory
-from validphys.filters import KIN_LABEL
 
 # set-up the yaml reader
 yaml = YAML(pure=True)
@@ -31,6 +27,8 @@
 
 def _legacy_nnpdf_translation(df, proc_type):
     """When reading variables with k1/k2/k3 tries to figure out to which variables it corresponds"""
+    from validphys.filters import KIN_LABEL
+
     new_vars = list(KIN_LABEL[proc_type])
     # Reorganize a bit the names to avoid extra problems
     if "M_ll" in new_vars:
@@ -223,6 +221,8 @@ def _generate_nnlojet_pinecard(runname, process, energy, experiment, histograms)
 def generate_pinecard_from_nnpdf(nnpdf_dataset, scale="mz", output_path="."):
     """Generate a NNLOJET pinecard from an NNPDF dataset"""
     # Load the NNPDF dataset
+    from validphys.api import API
+
     commondata = API.commondata(dataset_input={"dataset": nnpdf_dataset})
     metadata = commondata.metadata
     kin_df = metadata.load_kinematics(drop_minmax=False)
diff --git a/src/pinefarm/external/nnlojet/runcardgen.py b/src/pinefarm/external/nnlojet/runcardgen.py
index 9dbb656..7d6b1cf 100755
--- a/src/pinefarm/external/nnlojet/runcardgen.py
+++ b/src/pinefarm/external/nnlojet/runcardgen.py
@@ -305,7 +305,9 @@ def generate_runcard(
     return runcard_path
 
 
-def generate_nnlojet_runcard(yamlinfo, channels=("LO",), output=Path(".")):
+def generate_nnlojet_runcard(
+    yamlinfo, channels=("LO",), output=Path("."), warmup=False
+):
     """Generate a nnlojet runcard from a yaml pinecard"""
     yaml_metadata = YamlLOJET(**yamlinfo)
 
@@ -313,5 +315,8 @@ def generate_nnlojet_runcard(yamlinfo, channels=("LO",), output=Path(".")):
 
     runcards = []
     for channel in channels:
-        runcards.append(generate_runcard(yaml_metadata, channel, output=output))
+        runcard_path = generate_runcard(
+            yaml_metadata, channel, output=output, is_warmup=warmup
+        )
+        runcards.append(runcard_path)
     return runcards
diff --git a/src/pinefarm/external/nnlojet/runner.py b/src/pinefarm/external/nnlojet/runner.py
index 19c348e..1d0fa0f 100644
--- a/src/pinefarm/external/nnlojet/runner.py
+++ b/src/pinefarm/external/nnlojet/runner.py
@@ -1,8 +1,5 @@
 """Provides a runner for NNLOJET."""
 
-import subprocess as sp
-import sys
-
 from yaml import safe_load
 
 from .. import interface
@@ -17,29 +14,31 @@ def __init__(self, pinecard, theorycard, *args, **kwargs):
 
         pinecard = pinecard.replace("NNLOJET_", "")
         yaml_card = (self.source / pinecard).with_suffix(".yaml")
-        yaml_dict = safe_load(yaml_card.open("r"))
+        # Save the yaml dictionary from the NNLOJET pinecard
+        self._yaml_dict = safe_load(yaml_card.open("r"))
 
+    def preparation(self):
+        """Run the preparation step for NNLOJET"""
         # Update the yaml card according to the theory
-        params = yaml_dict["parameters"]
+        params = self._yaml_dict["parameters"]
 
-        ckm_first = float(theorycard.get("CKM", "1.0").split()[0])
+        ckm_first = float(self.theory.get("CKM", "1.0").split()[0])
         if ckm_first != 1.0:
             params["CKM"] = "FULL"
 
         translate = [("MZ", "MASS[Z]"), ("MW", "MASS[W]")]
         for nnpdf_key, nnlojet_key in translate:
-            if nnpdf_key in theorycard:
-                params[nnlojet_key] = theorycard[nnpdf_key]
+            if nnpdf_key in self.theory:
+                params[nnlojet_key] = self.theory[nnpdf_key]
 
         # Autodiscover scale if possible
-        if "scales" in yaml_dict:
-            scdict = yaml_dict["scales"]
+        if (scdict := self._yaml_dict.get("scales")) is not None:
             for scale, key in scdict.items():
-                if isinstance(key, str) and key.upper() in theorycard:
-                    scdict[scale] = theorycard[key.upper()]
+                if isinstance(key, str) and key.upper() in self.theory:
+                    scdict[scale] = self.theory[key.upper()]
 
         # Select channels according to PTO
-        order = theorycard.get("PTO")
+        order = self.theory.get("PTO")
         channels = ["LO"]
         if order > 0:
             channels += ["R", "V"]
@@ -48,23 +47,16 @@ def __init__(self, pinecard, theorycard, *args, **kwargs):
         if order > 2:
             raise NotImplementedError("N3LO still not working")
 
-        self._nnlojet_runcards = generate_nnlojet_runcard(
-            yaml_dict, channels, output=self.dest
-        )
+        # Generate both the production and warmup runcards
+        for warmup in [True, False]:
+            _ = generate_nnlojet_runcard(
+                self._yaml_dict, channels, output=self.dest, warmup=warmup
+            )
+        return True
 
     def run(self):
         """Run the corresponding NNLOJET runcard"""
-        print(f"NNLOJET running not implemented, but you can find the NNLOJET runcards at:\n> {self.dest}")
-        sys.exit(-1)
-#         for runcard in self._nnlojet_runcards:
-#             # Exit regardless of whether there's a NNLOJET executable for now
-#             try:
-#                 sp.run(["NNLOJET", "-run", runcard.name], cwd=self.dest, check=True)
-#             except FileNotFoundError:
-#                 print(
-#                     f"NNLOJET executable not found, but you can find the NNLOJET runcards at:\n> {self.dest}"
-#                 )
-#                 sys.exit(-1)
+        raise NotImplementedError("NNLOJET running not implemented outside of dry mode")
 
     def collect_versions(self) -> dict:
         return {"nnlojet_version": "secret"}