From 1e21a96a28197bb8534cbf56015258dd71356351 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 22 Feb 2024 13:43:05 +0100 Subject: [PATCH 01/35] feat: add function for new lst-binning approach --- hera_opm/mf_tools.py | 174 ++++++++++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 70 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index e67f8b0..9245e21 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1363,6 +1363,92 @@ def get_lstbin_datafiles(config, parent_dir): for df in datafiles ] +def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): + try: + from hera_cal.lst_stack import make_lst_bin_config_file + except ImportError: + from hera_cal.lstbin_simple import make_lst_bin_config_file + + # Get dlst. Updated version supports leaving dlst unspecified or set as null. + # To support older versions which required string 'None', set that to None here. + dlst = get_config_entry( + config, "LSTBIN_OPTS", "dlst", default=None, required=False + ) + if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): + warnings.warn( + "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", + DeprecationWarning, + ) + dlst = None + + lstbin_config_file = Path(outdir) / "file-config.yaml" + + clobber = get_config_entry(config, "LSTBIN_OPTS", "overwrite", default=False) + atol = get_config_entry(config, "LSTBIN_OPTS", "atol", default=1e-10) + lst_start = get_config_entry( + config, "LSTBIN_OPTS", "lst_start", default=None, required=False + ) + lst_width = get_config_entry( + config, "LSTBIN_OPTS", "lst_width", default=2 * math.pi + ) + ntimes_per_file = get_config_entry( + config, "LSTBIN_OPTS", "ntimes_per_file", default=60 + ) + blts_are_rectangular = get_config_entry( + config, "LSTBIN_OPTS", "blts_are_rectangular", default=None, required=False + ) + time_axis_faster_than_bls = get_config_entry( + config, + "LSTBIN_OPTS", + "time_axis_faster_than_bls", + default=None, + required=False, + ) + jd_regex = get_config_entry( + config, "LSTBIN_OPTS", "jd_regex", default=r"zen\.(\d+\.\d+)\." + ) + + file_config = make_lst_bin_config_file( + config_file=lstbin_config_file, + data_files=datafiles, + clobber=clobber, + dlst=dlst, + atol=atol, + lst_start=lst_start, + lst_width=lst_width, + ntimes_per_file=ntimes_per_file, + blts_are_rectangular=blts_are_rectangular, + time_axis_faster_than_bls=time_axis_faster_than_bls, + jd_regex=jd_regex, + ) + print(f"Created lstbin config file at {lstbin_config_file}.") + + return len(file_config['matched_files']) + +def make_lstbin_config_file(config, outdir: str) -> int: + # This must be a TOML file that specifies how to construct the LSTbin file-config + binning_config_file = get_config_entry( + config, + "LSTBIN_OPTS", + "binning-config", + required=True, + ) + + from hera_cal.lst_stack.config import LSTBinConfiguration + + lstconfig = LSTBinConfiguration.from_toml(binning_config_file) + matched_files = lstconfig.get_matched_files() + lst_file_config = lstconfig.create_config(matched_files) + + lstbin_config_file = Path(outdir) / "file-config.h5" + + lst_file_config.write(lstbin_config_file) + + return len(lst_file_config.matched_files) + + + + def build_lstbin_makeflow_from_config( config_file, mf_name=None, work_dir=None, **kwargs @@ -1391,7 +1477,6 @@ def build_lstbin_makeflow_from_config( """ # import hera_cal - from hera_cal import lst_stack as lstbin # read in config file config = toml.load(config_file) @@ -1437,10 +1522,7 @@ def build_lstbin_makeflow_from_config( get_config_entry(config, "LSTBIN_OPTS", "parent_dir", required=True) ) - if work_dir is None: - work_dir = parent_dir - else: - work_dir = Path(work_dir) + work_dir = Path(work_dir or parent_dir) makeflowfile = work_dir / fn @@ -1468,76 +1550,28 @@ def build_lstbin_makeflow_from_config( ) print("export BATCH_OPTIONS = {}".format(batch_options), file=f) - datafiles = get_lstbin_datafiles(config, parent_dir) - - print("Searching for files in the following globs: ") - for df in datafiles: - print(" " + df.strip("'").strip('"')) - - # pre-process files to determine the number of output files - _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] - _datafiles = [df for df in _datafiles if len(df) > 0] - if "outdir" in kwargs: outdir = Path(kwargs["outdir"]) else: outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) - lstbin_config_file = Path(outdir) / "file-config.yaml" - - # Get dlst. Updated version supports leaving dlst unspecified or set as null. - # To support older versions which required string 'None', set that to None here. - dlst = get_config_entry( - config, "LSTBIN_OPTS", "dlst", default=None, required=False - ) - if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): - warnings.warn( - "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", - DeprecationWarning, - ) - dlst = None - - clobber = get_config_entry(config, "LSTBIN_OPTS", "overwrite", default=False) - atol = get_config_entry(config, "LSTBIN_OPTS", "atol", default=1e-10) - lst_start = get_config_entry( - config, "LSTBIN_OPTS", "lst_start", default=None, required=False - ) - lst_width = get_config_entry( - config, "LSTBIN_OPTS", "lst_width", default=2 * math.pi - ) - ntimes_per_file = get_config_entry( - config, "LSTBIN_OPTS", "ntimes_per_file", default=60 - ) - blts_are_rectangular = get_config_entry( - config, "LSTBIN_OPTS", "blts_are_rectangular", default=None, required=False - ) - time_axis_faster_than_bls = get_config_entry( - config, - "LSTBIN_OPTS", - "time_axis_faster_than_bls", - default=None, - required=False, - ) - jd_regex = get_config_entry( - config, "LSTBIN_OPTS", "jd_regex", default=r"zen\.(\d+\.\d+)\." - ) - - file_config = lstbin.make_lst_bin_config_file( - config_file=lstbin_config_file, - data_files=_datafiles, - clobber=clobber, - dlst=dlst, - atol=atol, - lst_start=lst_start, - lst_width=lst_width, - ntimes_per_file=ntimes_per_file, - blts_are_rectangular=blts_are_rectangular, - time_axis_faster_than_bls=time_axis_faster_than_bls, - jd_regex=jd_regex, - ) - print(f"Created lstbin config file at {lstbin_config_file}.") - - nfiles = len(file_config["matched_files"]) if parallelize else 1 + try: + nfiles = make_lstbin_config_file(config, outdir) + except ImportError: + datafiles = get_lstbin_datafiles(config, parent_dir) + + print("Searching for files in the following globs: ") + for df in datafiles: + print(" " + df.strip("'").strip('"')) + + # pre-process files to determine the number of output files + _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] + _datafiles = [df for df in _datafiles if len(df) > 0] + + nfiles = _legacy_make_lstbin_config_file(config, outdir) + + if not parallelize: + nfiles = 1 # loop over output files for output_file_index in range(nfiles): From 36f421d796b32d53ab776e1c2cff4f858866f647 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 10:53:40 +0100 Subject: [PATCH 02/35] feat: new notebook-based lstbin workflow creator --- hera_opm/mf_tools.py | 219 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 183 insertions(+), 36 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 9245e21..bdc875e 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -694,6 +694,10 @@ def build_makeflow_from_config( build_lstbin_makeflow_from_config( config_file, mf_name=mf_name, work_dir=work_dir, **kwargs ) + elif makeflow_type == "lstbin-notebook": + build_lstbin_notebook_makeflow_from_config( + config_file, mf_name=mf_name, work_dir=work_dir, **kwargs + ) else: raise ValueError( "unknown makeflow_type {} specified; must be 'analysis' or 'lstbin'".format( @@ -703,6 +707,20 @@ def build_makeflow_from_config( return +def _get_timeout(config): + timeout = get_config_entry(config, "Options", "timeout", required=False) + if timeout is not None: + # check that the `timeout' command exists on the system + try: + subprocess.check_output(["timeout", "--help"]) + except OSError: # pragma: no cover + warnings.warn( + 'A value for the "timeout" option was specified,' + " but the `timeout' command does not appear to be" + " installed. Please install or remove the option" + " from the config file" + ) + return timeout def build_analysis_makeflow_from_config( obsids, config_file, mf_name=None, work_dir=None @@ -801,19 +819,7 @@ def build_analysis_makeflow_from_config( source_script = get_config_entry(config, "Options", "source_script", required=False) mail_user = get_config_entry(config, "Options", "mail_user", required=False) batch_system = get_config_entry(config, "Options", "batch_system", required=False) - timeout = get_config_entry(config, "Options", "timeout", required=False) - if timeout is not None: - # check that the `timeout' command exists on the system - try: - subprocess.check_output(["timeout", "--help"]) - except OSError: # pragma: no cover - warnings.warn( - 'A value for the "timeout" option was specified,' - " but the `timeout' command does not appear to be" - " installed. Please install or remove the option" - " from the config file" - ) - timeout = timeout + timeout = _get_timeout(config) # open file for writing cf = os.path.basename(config_file) @@ -1493,18 +1499,7 @@ def build_lstbin_makeflow_from_config( conda_env = get_config_entry(config, "Options", "conda_env", required=False) source_script = get_config_entry(config, "Options", "source_script", required=False) batch_system = get_config_entry(config, "Options", "batch_system", required=False) - timeout = get_config_entry(config, "Options", "timeout", required=False) - if timeout is not None: - # check that the `timeout' command exists on the system - try: - subprocess.check_output(["timeout", "--help"]) - except OSError: # pragma: no cover - warnings.warn( - 'A value for the "timeout" option was specified,' - " but the `timeout' command does not appear to be" - " installed. Please install or remove the option" - " from the config file" - ) + timeout = _get_timeout(config) # open file for writing if mf_name is not None: @@ -1555,20 +1550,17 @@ def build_lstbin_makeflow_from_config( else: outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) - try: - nfiles = make_lstbin_config_file(config, outdir) - except ImportError: - datafiles = get_lstbin_datafiles(config, parent_dir) + datafiles = get_lstbin_datafiles(config, parent_dir) - print("Searching for files in the following globs: ") - for df in datafiles: - print(" " + df.strip("'").strip('"')) + print("Searching for files in the following globs: ") + for df in datafiles: + print(" " + df.strip("'").strip('"')) - # pre-process files to determine the number of output files - _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] - _datafiles = [df for df in _datafiles if len(df) > 0] + # pre-process files to determine the number of output files + _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] + _datafiles = [df for df in _datafiles if len(df) > 0] - nfiles = _legacy_make_lstbin_config_file(config, outdir) + nfiles = _legacy_make_lstbin_config_file(config, outdir) if not parallelize: nfiles = 1 @@ -1645,6 +1637,161 @@ def build_lstbin_makeflow_from_config( return +def build_lstbin_notebook_makeflow_from_config( + config_file: str | Path, + mf_name: str | None=None, + work_dir: str | Path | None=None, + **kwargs +) -> None: + """Construct a notebook-based LST-binning makeflow file from input data and a config_file. + + This is used from H6C+ with hera_cal 4+. + + Parameters + ---------- + config_file : str + Full path to config file containing options. + mf_name : str + The name of makeflow file. Defaults to ".mf" if not + specified. + work_dir : str or Path, optional + The directory in which to write the makeflow file and wrapper files. + If not specified, the parent directory of the config file will be used. + """ + config_file = Path(config_file) + # read in config file + config = toml.load(config_file) + cf = config_file.name + + if mf_name is None: + mf_name = config_file.with_suffix(".mf").name + + work_dir = Path(work_dir or config_file.parent) + + makeflowfile = work_dir / mf_name + + # get LSTBIN arguments + lstbin_args = get_config_entry(config, "LSTBIN", "args", required=False) + + # set output_file_select to None + config["LSTBIN_OPTS"]["output_file_select"] = str("None") + config['LSTBIN_OPTS']['thisfile'] = str(config_file.absolute()) + + # get general options + path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) + conda_env = get_config_entry(config, "Options", "conda_env", required=False) + source_script = get_config_entry(config, "Options", "source_script", required=False) + batch_system = get_config_entry(config, "Options", "batch_system", required=False) + timeout = _get_timeout(config) + + # determine whether or not to parallelize + parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) + + actions = get_config_entry(config, "WorkFlow", "actions", required=True) + if len(actions) > 1: + raise ValueError("This function only supports a single action in the workflow.") + if len(actions) == 0: + raise ValueError("No actions found in the workflow.") + action = actions[0] + + # define command + command = path_to_do_scripts / f"do_{action}.sh" + + # add resource information + base_mem = get_config_entry(config, "Options", "base_mem", required=True) + base_cpu = get_config_entry(config, "Options", "base_cpu", required=False) + mail_user = get_config_entry(config, "Options", "mail_user", required=False) + default_queue = get_config_entry( + config, "Options", "default_queue", required=False + ) + if default_queue is None: + default_queue = "hera" + batch_options = process_batch_options( + base_mem, base_cpu, mail_user, default_queue, batch_system + ) + + outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) + + # The new way in H6C+ (notebook interface) + nfiles = make_lstbin_config_file(config, outdir) + + if not parallelize: + nfiles = 1 + + source_script_line = f"source {source_script}" if source_script else "" + conda_env_line = f"conda activate {conda_env}" if conda_env else "" + cmd = f"{command} {{args}}" + cmdline = f"timeout {timeout} {cmdline}" if timeout is not None else cmd + + wrapper_template = f"""#!/bin/bash +{source_script_line} +{conda_env_line} +date +cd {work_dir} +{cmdline} +if [ $? -eq 0 ]; then + cd {work_dir} + touch {outfile} +else + mv {logfile} {logfile.parent / f"{logfile.name}.error"} +fi +date + """ + + # write makeflow file + with open(makeflowfile, "w") as fl: + # add comment at top of file listing date of creation and config file name + dt = time.strftime("%H:%M:%S on %d %B %Y") + fl.write( + f"""# makeflow file generated from config file {config_file.name} +# created at {dt} +export BATCH_OPTIONS = {batch_options} +""") + + # loop over output files + for output_file_index in range(nfiles): + # if parallize, update output_file_select + if parallelize: + config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) + + # make outfile list + outfile = Path(f"{output_file_index:04}.LSTBIN.out") + + # get args list for lst-binning step + args = [ + str(get_config_entry(config, "LSTBIN_OPTS", a, required=True)) + for a in lstbin_args + ] + # turn into string + args = " ".join(args) + + # make logfile name + # logfile will capture stdout and stderr + logfile = work_dir / outfile.with_suffix(".log").name + + # make a small wrapper script that will run the actual command + # can't embed if; then statements in makeflow script + wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" + + with open(wrapper_script, "w") as f2: + f2.write(wrapper_template.format(args=args)) + + # make file executable + os.chmod(wrapper_script, 0o755) + + # first line lists target file to make (dummy output file), and requirements + # second line is "build rule", which runs the shell script and makes the output file + lines = f"{outfile}: {command}\n\t{wrapper_script} > {logfile} 2>&1\n" + fl.write(lines) + + # Write the toml config to the output directory. + shutil.copy2(config_file, outdir / "lstbin-config.toml") + + # Also write the conda_env export to the LSTbin dir + if conda_env is not None: + os.system( + f"conda env export -n {conda_env} --file {outdir}/environment.yaml" + ) def clean_wrapper_scripts(work_dir): """Clean up wrapper scripts from work directory. From 3fa4ead761ab6acbcc0974e444b6251a389e52c3 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 11:21:31 +0100 Subject: [PATCH 03/35] ux: more printing to help know how many files have beeen found --- hera_opm/mf_tools.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index bdc875e..e256b6b 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1443,6 +1443,11 @@ def make_lstbin_config_file(config, outdir: str) -> int: from hera_cal.lst_stack.config import LSTBinConfiguration lstconfig = LSTBinConfiguration.from_toml(binning_config_file) + print(f"Found {len(lstconfig.data_files)} nights of data.") + print("Each night has the following number of files:") + for flist in lstconfig.data_files: + print(f"{flist[0].parent.name}: {len(flist)}") + matched_files = lstconfig.get_matched_files() lst_file_config = lstconfig.create_config(matched_files) @@ -1641,7 +1646,6 @@ def build_lstbin_notebook_makeflow_from_config( config_file: str | Path, mf_name: str | None=None, work_dir: str | Path | None=None, - **kwargs ) -> None: """Construct a notebook-based LST-binning makeflow file from input data and a config_file. @@ -1661,7 +1665,6 @@ def build_lstbin_notebook_makeflow_from_config( config_file = Path(config_file) # read in config file config = toml.load(config_file) - cf = config_file.name if mf_name is None: mf_name = config_file.with_suffix(".mf").name From 295e8980f2d6d9ab9869293b36ec371843a6c5f4 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 11:23:39 +0100 Subject: [PATCH 04/35] ux: more printing to help know how many files have beeen found --- hera_opm/mf_tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index e256b6b..3acaef8 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1446,8 +1446,11 @@ def make_lstbin_config_file(config, outdir: str) -> int: print(f"Found {len(lstconfig.data_files)} nights of data.") print("Each night has the following number of files:") for flist in lstconfig.data_files: - print(f"{flist[0].parent.name}: {len(flist)}") + if len(flist) == 0: + continue + print(f"{flist[0].parent.name}: {len(flist)}") + matched_files = lstconfig.get_matched_files() lst_file_config = lstconfig.create_config(matched_files) From 95f9227882ce014631df7faa4f52c82a77768cb4 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 13:29:48 +0100 Subject: [PATCH 05/35] fix: typo in cmd --- hera_opm/mf_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 3acaef8..c6f356d 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1448,7 +1448,7 @@ def make_lstbin_config_file(config, outdir: str) -> int: for flist in lstconfig.data_files: if len(flist) == 0: continue - + print(f"{flist[0].parent.name}: {len(flist)}") matched_files = lstconfig.get_matched_files() @@ -1727,7 +1727,7 @@ def build_lstbin_notebook_makeflow_from_config( source_script_line = f"source {source_script}" if source_script else "" conda_env_line = f"conda activate {conda_env}" if conda_env else "" cmd = f"{command} {{args}}" - cmdline = f"timeout {timeout} {cmdline}" if timeout is not None else cmd + cmdline = f"timeout {timeout} {cmd}" if timeout is not None else cmd wrapper_template = f"""#!/bin/bash {source_script_line} From f64c268bb71a8baeb942efc1cba4b0184f63a3ec Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 13:41:21 +0100 Subject: [PATCH 06/35] early reference to outfile --- hera_opm/mf_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index c6f356d..1e12ffb 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1737,9 +1737,9 @@ def build_lstbin_notebook_makeflow_from_config( {cmdline} if [ $? -eq 0 ]; then cd {work_dir} - touch {outfile} + touch {{outfile}} else - mv {logfile} {logfile.parent / f"{logfile.name}.error"} + mv {{logfile}} {{logfile.parent / f"{logfile.name}.error"}} fi date """ @@ -1780,7 +1780,7 @@ def build_lstbin_notebook_makeflow_from_config( wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" with open(wrapper_script, "w") as f2: - f2.write(wrapper_template.format(args=args)) + f2.write(wrapper_template.format(args=args, outfile=outfile, logfile=logfile)) # make file executable os.chmod(wrapper_script, 0o755) From b64e5c2a54ce1fa0a77f5b1956110b6c0912a19b Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 13:52:38 +0100 Subject: [PATCH 07/35] fix: bad ref to logfile --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 1e12ffb..a2b9147 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1739,7 +1739,7 @@ def build_lstbin_notebook_makeflow_from_config( cd {work_dir} touch {{outfile}} else - mv {{logfile}} {{logfile.parent / f"{logfile.name}.error"}} + mv {{logfile}} {{logfile}}.error fi date """ From 072cd42858ccea1eb884a692cc7eb0b9629baf1c Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 7 Mar 2024 14:15:15 +0100 Subject: [PATCH 08/35] fix: get correct opts for lstbin-notebook --- hera_opm/mf_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index a2b9147..88347dd 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1676,9 +1676,6 @@ def build_lstbin_notebook_makeflow_from_config( makeflowfile = work_dir / mf_name - # get LSTBIN arguments - lstbin_args = get_config_entry(config, "LSTBIN", "args", required=False) - # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") config['LSTBIN_OPTS']['thisfile'] = str(config_file.absolute()) @@ -1700,6 +1697,9 @@ def build_lstbin_notebook_makeflow_from_config( raise ValueError("No actions found in the workflow.") action = actions[0] + # get LSTBIN arguments + lstbin_args = get_config_entry(config, action, "args", required=False) + # define command command = path_to_do_scripts / f"do_{action}.sh" From d042cd7e0bcc040ec5921ad40aad3320a2b9dcab Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Wed, 27 Mar 2024 16:42:04 +0100 Subject: [PATCH 09/35] feat: write out YAML file for running papermill --- hera_opm/mf_tools.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 88347dd..003b77b 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -14,7 +14,7 @@ import toml from pathlib import Path import math - +import yaml def get_jd(filename): """Get the JD from a data file name. @@ -1790,9 +1790,16 @@ def build_lstbin_notebook_makeflow_from_config( lines = f"{outfile}: {command}\n\t{wrapper_script} > {logfile} 2>&1\n" fl.write(lines) + # Write the toml config to the output directory. shutil.copy2(config_file, outdir / "lstbin-config.toml") + # Also write a YAML version of just the parameters, to be used to run + # the notebook + cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] + with open(outdir / "lstavg-config.yaml", "w") as fl: + yaml.dump(cfg_opts, fl) + # Also write the conda_env export to the LSTbin dir if conda_env is not None: os.system( From 3ba0a56355b9ac433950cb37957453dd2ea7854a Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Wed, 27 Mar 2024 19:39:06 +0100 Subject: [PATCH 10/35] fix: use toml not yaml --- hera_opm/mf_tools.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 003b77b..43850aa 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -14,7 +14,6 @@ import toml from pathlib import Path import math -import yaml def get_jd(filename): """Get the JD from a data file name. @@ -1676,9 +1675,21 @@ def build_lstbin_notebook_makeflow_from_config( makeflowfile = work_dir / mf_name + outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) + + # Write the toml config to the output directory. + shutil.copy2(config_file, outdir / "lstbin-config.toml") + + # Also write a YAML version of just the parameters, to be used to run + # the notebook + cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] + yaml_file = outdir / "lstavg-config.toml" + with open(yaml_file, "w") as fl: + toml.dump(cfg_opts, fl) + # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config['LSTBIN_OPTS']['thisfile'] = str(config_file.absolute()) + config['LSTBIN_OPTS']['yamlfile'] = str(yaml_file.absolute()) # get general options path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) @@ -1716,7 +1727,6 @@ def build_lstbin_notebook_makeflow_from_config( base_mem, base_cpu, mail_user, default_queue, batch_system ) - outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) # The new way in H6C+ (notebook interface) nfiles = make_lstbin_config_file(config, outdir) @@ -1791,14 +1801,6 @@ def build_lstbin_notebook_makeflow_from_config( fl.write(lines) - # Write the toml config to the output directory. - shutil.copy2(config_file, outdir / "lstbin-config.toml") - - # Also write a YAML version of just the parameters, to be used to run - # the notebook - cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] - with open(outdir / "lstavg-config.yaml", "w") as fl: - yaml.dump(cfg_opts, fl) # Also write the conda_env export to the LSTbin dir if conda_env is not None: From a1267a72b83a815d0854d066e38849a9bbdfa418 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Wed, 27 Mar 2024 19:40:35 +0100 Subject: [PATCH 11/35] fix: use toml not yaml --- hera_opm/mf_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 43850aa..f47928e 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1683,13 +1683,13 @@ def build_lstbin_notebook_makeflow_from_config( # Also write a YAML version of just the parameters, to be used to run # the notebook cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] - yaml_file = outdir / "lstavg-config.toml" - with open(yaml_file, "w") as fl: + lstavg_config = outdir / "lstavg-config.toml" + with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config['LSTBIN_OPTS']['yamlfile'] = str(yaml_file.absolute()) + config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) # get general options path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) From f2a16c84f4f11e5fba817c2289f8e04692846b3b Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 08:17:33 +0100 Subject: [PATCH 12/35] fix: use abs path to work_dir --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index f47928e..8f0f1d9 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1671,7 +1671,7 @@ def build_lstbin_notebook_makeflow_from_config( if mf_name is None: mf_name = config_file.with_suffix(".mf").name - work_dir = Path(work_dir or config_file.parent) + work_dir = Path(work_dir or config_file.parent).absolute() makeflowfile = work_dir / mf_name From 2d6d5381941cf134c48ec1e17c6bcb57cf400611 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 08:48:24 +0100 Subject: [PATCH 13/35] fix: add kernel to call to notebook --- hera_opm/mf_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 8f0f1d9..df686db 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1690,7 +1690,8 @@ def build_lstbin_notebook_makeflow_from_config( # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) - + config['LSTBIN_OPTS']['kernel'] = str(get_config_entry(config, "LSTBIN_OPTS", "conda_env", required=True)) + # get general options path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) conda_env = get_config_entry(config, "Options", "conda_env", required=False) From df7f48c7f91f2b17bce127983e7f4dec6e9dffe8 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 08:54:43 +0100 Subject: [PATCH 14/35] fix: make notebook task executable --- hera_opm/mf_tools.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index df686db..3e587d2 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1687,11 +1687,7 @@ def build_lstbin_notebook_makeflow_from_config( with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) - # set output_file_select to None - config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) - config['LSTBIN_OPTS']['kernel'] = str(get_config_entry(config, "LSTBIN_OPTS", "conda_env", required=True)) - + # get general options path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) conda_env = get_config_entry(config, "Options", "conda_env", required=False) @@ -1699,6 +1695,11 @@ def build_lstbin_notebook_makeflow_from_config( batch_system = get_config_entry(config, "Options", "batch_system", required=False) timeout = _get_timeout(config) + # set output_file_select to None + config["LSTBIN_OPTS"]["output_file_select"] = str("None") + config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) + config['LSTBIN_OPTS']['kernel'] = conda_env + # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) From 6c35bd5403f7ceed7906f36ede43356aa0e338fd Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 09:37:23 +0100 Subject: [PATCH 15/35] feat: add plot_every option for notebooks --- hera_opm/mf_tools.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 3e587d2..0a8ed6a 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1700,6 +1700,14 @@ def build_lstbin_notebook_makeflow_from_config( config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) config['LSTBIN_OPTS']['kernel'] = conda_env + if 'make_plots' not in config['LSTBIN_OPTS']: + if 'plot_every' in config["LSTBIN_OPTS"]: + plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) + else: + plot_every = 1 + else: + plot_every = int(bool(config['LSTBIN_OPTS']['make_plots'])) + # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) @@ -1771,6 +1779,10 @@ def build_lstbin_notebook_makeflow_from_config( # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) + if plot_every > 0: + config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) + else: + config["LSTBIN_OPTS"]['make_plots'] = "False" # make outfile list outfile = Path(f"{output_file_index:04}.LSTBIN.out") From 2f65cfa45d5daebfba5aae74e535b14d39fa21c6 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 10:36:19 +0100 Subject: [PATCH 16/35] fix: add fileconf param --- hera_opm/mf_tools.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 0a8ed6a..c6a4cd2 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1457,7 +1457,7 @@ def make_lstbin_config_file(config, outdir: str) -> int: lst_file_config.write(lstbin_config_file) - return len(lst_file_config.matched_files) + return lstbin_config_file, len(lst_file_config.matched_files) @@ -1687,7 +1687,6 @@ def build_lstbin_notebook_makeflow_from_config( with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) - # get general options path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) conda_env = get_config_entry(config, "Options", "conda_env", required=False) @@ -1737,10 +1736,9 @@ def build_lstbin_notebook_makeflow_from_config( base_mem, base_cpu, mail_user, default_queue, batch_system ) - - # The new way in H6C+ (notebook interface) - nfiles = make_lstbin_config_file(config, outdir) - + lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir) + config['LSTBIN_OPTS']['lstconf'] = str(lstbin_config_file.absolute()) + if not parallelize: nfiles = 1 From f4a765a69fcf6b420a65eb5f02a32ed116be3d73 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 13:07:21 +0100 Subject: [PATCH 17/35] tmp: add print for debug --- hera_opm/mf_tools.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index c6a4cd2..16d2390 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1738,7 +1738,7 @@ def build_lstbin_notebook_makeflow_from_config( lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir) config['LSTBIN_OPTS']['lstconf'] = str(lstbin_config_file.absolute()) - + if not parallelize: nfiles = 1 @@ -1777,8 +1777,10 @@ def build_lstbin_notebook_makeflow_from_config( # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) + print("PLOT_EVERY: ", plot_every) if plot_every > 0: config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) + print(config['LSTBIN_OPTS']['make_plots']) else: config["LSTBIN_OPTS"]['make_plots'] = "False" From 3e1063b9a798b990c9d55ad7affe5c7a9c5af654 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 13:13:28 +0100 Subject: [PATCH 18/35] tmp: add print for debug --- hera_opm/mf_tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 16d2390..8686656 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1700,8 +1700,11 @@ def build_lstbin_notebook_makeflow_from_config( config['LSTBIN_OPTS']['kernel'] = conda_env if 'make_plots' not in config['LSTBIN_OPTS']: + print('not in there') if 'plot_every' in config["LSTBIN_OPTS"]: + print('plot every is...') plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) + print(plot_every) else: plot_every = 1 else: From d37c90fb344ccaab64c5515c488e3c6c4517363c Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 14:49:36 +0100 Subject: [PATCH 19/35] remove prints --- hera_opm/mf_tools.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 8686656..5a2674e 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1700,11 +1700,8 @@ def build_lstbin_notebook_makeflow_from_config( config['LSTBIN_OPTS']['kernel'] = conda_env if 'make_plots' not in config['LSTBIN_OPTS']: - print('not in there') if 'plot_every' in config["LSTBIN_OPTS"]: - print('plot every is...') plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) - print(plot_every) else: plot_every = 1 else: @@ -1780,10 +1777,8 @@ def build_lstbin_notebook_makeflow_from_config( # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) - print("PLOT_EVERY: ", plot_every) if plot_every > 0: config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) - print(config['LSTBIN_OPTS']['make_plots']) else: config["LSTBIN_OPTS"]['make_plots'] = "False" From ebdf212e59d746acaad121ccb0fc77487f4d783e Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 14:57:56 +0100 Subject: [PATCH 20/35] fix: add future annotations --- hera_opm/mf_tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 5a2674e..5452a87 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -2,6 +2,7 @@ # Copyright (c) 2018 The HERA Collaboration # Licensed under the 2-clause BSD License """Module for converting a config file into a makeflow script.""" +from __future__ import annotations import os import re From 637049fab644f7f2028e989b209a2bfde052e72b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:05:07 +0000 Subject: [PATCH 21/35] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hera_opm/mf_tools.py | 75 +++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 5452a87..242551b 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -16,6 +16,7 @@ from pathlib import Path import math + def get_jd(filename): """Get the JD from a data file name. @@ -707,6 +708,7 @@ def build_makeflow_from_config( return + def _get_timeout(config): timeout = get_config_entry(config, "Options", "timeout", required=False) if timeout is not None: @@ -722,6 +724,7 @@ def _get_timeout(config): ) return timeout + def build_analysis_makeflow_from_config( obsids, config_file, mf_name=None, work_dir=None ): @@ -1369,6 +1372,7 @@ def get_lstbin_datafiles(config, parent_dir): for df in datafiles ] + def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): try: from hera_cal.lst_stack import make_lst_bin_config_file @@ -1377,9 +1381,7 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): # Get dlst. Updated version supports leaving dlst unspecified or set as null. # To support older versions which required string 'None', set that to None here. - dlst = get_config_entry( - config, "LSTBIN_OPTS", "dlst", default=None, required=False - ) + dlst = get_config_entry(config, "LSTBIN_OPTS", "dlst", default=None, required=False) if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): warnings.warn( "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", @@ -1429,7 +1431,8 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): ) print(f"Created lstbin config file at {lstbin_config_file}.") - return len(file_config['matched_files']) + return len(file_config["matched_files"]) + def make_lstbin_config_file(config, outdir: str) -> int: # This must be a TOML file that specifies how to construct the LSTbin file-config @@ -1461,9 +1464,6 @@ def make_lstbin_config_file(config, outdir: str) -> int: return lstbin_config_file, len(lst_file_config.matched_files) - - - def build_lstbin_makeflow_from_config( config_file, mf_name=None, work_dir=None, **kwargs ): @@ -1569,7 +1569,7 @@ def build_lstbin_makeflow_from_config( _datafiles = [df for df in _datafiles if len(df) > 0] nfiles = _legacy_make_lstbin_config_file(config, outdir) - + if not parallelize: nfiles = 1 @@ -1645,10 +1645,11 @@ def build_lstbin_makeflow_from_config( return + def build_lstbin_notebook_makeflow_from_config( - config_file: str | Path, - mf_name: str | None=None, - work_dir: str | Path | None=None, + config_file: str | Path, + mf_name: str | None = None, + work_dir: str | Path | None = None, ) -> None: """Construct a notebook-based LST-binning makeflow file from input data and a config_file. @@ -1662,7 +1663,7 @@ def build_lstbin_notebook_makeflow_from_config( The name of makeflow file. Defaults to ".mf" if not specified. work_dir : str or Path, optional - The directory in which to write the makeflow file and wrapper files. + The directory in which to write the makeflow file and wrapper files. If not specified, the parent directory of the config file will be used. """ config_file = Path(config_file) @@ -1671,7 +1672,7 @@ def build_lstbin_notebook_makeflow_from_config( if mf_name is None: mf_name = config_file.with_suffix(".mf").name - + work_dir = Path(work_dir or config_file.parent).absolute() makeflowfile = work_dir / mf_name @@ -1681,9 +1682,9 @@ def build_lstbin_notebook_makeflow_from_config( # Write the toml config to the output directory. shutil.copy2(config_file, outdir / "lstbin-config.toml") - # Also write a YAML version of just the parameters, to be used to run + # Also write a YAML version of just the parameters, to be used to run # the notebook - cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] + cfg_opts = toml.load(config_file)["LSTAVG_OPTS"] lstavg_config = outdir / "lstavg-config.toml" with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) @@ -1697,20 +1698,22 @@ def build_lstbin_notebook_makeflow_from_config( # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) - config['LSTBIN_OPTS']['kernel'] = conda_env + config["LSTBIN_OPTS"]["lstavg_toml_file"] = str(lstavg_config.absolute()) + config["LSTBIN_OPTS"]["kernel"] = conda_env - if 'make_plots' not in config['LSTBIN_OPTS']: - if 'plot_every' in config["LSTBIN_OPTS"]: - plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) + if "make_plots" not in config["LSTBIN_OPTS"]: + if "plot_every" in config["LSTBIN_OPTS"]: + plot_every = int( + get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False) + ) else: plot_every = 1 else: - plot_every = int(bool(config['LSTBIN_OPTS']['make_plots'])) + plot_every = int(bool(config["LSTBIN_OPTS"]["make_plots"])) # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) - + actions = get_config_entry(config, "WorkFlow", "actions", required=True) if len(actions) > 1: raise ValueError("This function only supports a single action in the workflow.") @@ -1728,9 +1731,7 @@ def build_lstbin_notebook_makeflow_from_config( base_mem = get_config_entry(config, "Options", "base_mem", required=True) base_cpu = get_config_entry(config, "Options", "base_cpu", required=False) mail_user = get_config_entry(config, "Options", "mail_user", required=False) - default_queue = get_config_entry( - config, "Options", "default_queue", required=False - ) + default_queue = get_config_entry(config, "Options", "default_queue", required=False) if default_queue is None: default_queue = "hera" batch_options = process_batch_options( @@ -1738,7 +1739,7 @@ def build_lstbin_notebook_makeflow_from_config( ) lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir) - config['LSTBIN_OPTS']['lstconf'] = str(lstbin_config_file.absolute()) + config["LSTBIN_OPTS"]["lstconf"] = str(lstbin_config_file.absolute()) if not parallelize: nfiles = 1 @@ -1770,18 +1771,21 @@ def build_lstbin_notebook_makeflow_from_config( fl.write( f"""# makeflow file generated from config file {config_file.name} # created at {dt} -export BATCH_OPTIONS = {batch_options} -""") - +export BATCH_OPTIONS = {batch_options} +""" + ) + # loop over output files for output_file_index in range(nfiles): # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) if plot_every > 0: - config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) + config["LSTBIN_OPTS"]["make_plots"] = str( + output_file_index % plot_every == 0 + ) else: - config["LSTBIN_OPTS"]['make_plots'] = "False" + config["LSTBIN_OPTS"]["make_plots"] = "False" # make outfile list outfile = Path(f"{output_file_index:04}.LSTBIN.out") @@ -1801,9 +1805,11 @@ def build_lstbin_notebook_makeflow_from_config( # make a small wrapper script that will run the actual command # can't embed if; then statements in makeflow script wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" - + with open(wrapper_script, "w") as f2: - f2.write(wrapper_template.format(args=args, outfile=outfile, logfile=logfile)) + f2.write( + wrapper_template.format(args=args, outfile=outfile, logfile=logfile) + ) # make file executable os.chmod(wrapper_script, 0o755) @@ -1813,14 +1819,13 @@ def build_lstbin_notebook_makeflow_from_config( lines = f"{outfile}: {command}\n\t{wrapper_script} > {logfile} 2>&1\n" fl.write(lines) - - # Also write the conda_env export to the LSTbin dir if conda_env is not None: os.system( f"conda env export -n {conda_env} --file {outdir}/environment.yaml" ) + def clean_wrapper_scripts(work_dir): """Clean up wrapper scripts from work directory. From eba792dfc794d85eb786aaadd026bd4788f99008 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 28 Mar 2024 15:35:39 +0100 Subject: [PATCH 22/35] fix: use section of config file instead of external file --- hera_opm/mf_tools.py | 86 ++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 242551b..7275b40 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -16,7 +16,6 @@ from pathlib import Path import math - def get_jd(filename): """Get the JD from a data file name. @@ -708,7 +707,6 @@ def build_makeflow_from_config( return - def _get_timeout(config): timeout = get_config_entry(config, "Options", "timeout", required=False) if timeout is not None: @@ -724,7 +722,6 @@ def _get_timeout(config): ) return timeout - def build_analysis_makeflow_from_config( obsids, config_file, mf_name=None, work_dir=None ): @@ -1372,7 +1369,6 @@ def get_lstbin_datafiles(config, parent_dir): for df in datafiles ] - def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): try: from hera_cal.lst_stack import make_lst_bin_config_file @@ -1381,7 +1377,9 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): # Get dlst. Updated version supports leaving dlst unspecified or set as null. # To support older versions which required string 'None', set that to None here. - dlst = get_config_entry(config, "LSTBIN_OPTS", "dlst", default=None, required=False) + dlst = get_config_entry( + config, "LSTBIN_OPTS", "dlst", default=None, required=False + ) if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): warnings.warn( "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", @@ -1431,21 +1429,15 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): ) print(f"Created lstbin config file at {lstbin_config_file}.") - return len(file_config["matched_files"]) - + return len(file_config['matched_files']) def make_lstbin_config_file(config, outdir: str) -> int: # This must be a TOML file that specifies how to construct the LSTbin file-config - binning_config_file = get_config_entry( - config, - "LSTBIN_OPTS", - "binning-config", - required=True, - ) + lstconfig = config['FILE_CFG'] from hera_cal.lst_stack.config import LSTBinConfiguration - lstconfig = LSTBinConfiguration.from_toml(binning_config_file) + lstconfig = LSTBinConfiguration.from_toml(toml.dump(lstconfig)) print(f"Found {len(lstconfig.data_files)} nights of data.") print("Each night has the following number of files:") for flist in lstconfig.data_files: @@ -1464,6 +1456,9 @@ def make_lstbin_config_file(config, outdir: str) -> int: return lstbin_config_file, len(lst_file_config.matched_files) + + + def build_lstbin_makeflow_from_config( config_file, mf_name=None, work_dir=None, **kwargs ): @@ -1568,8 +1563,8 @@ def build_lstbin_makeflow_from_config( _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] _datafiles = [df for df in _datafiles if len(df) > 0] - nfiles = _legacy_make_lstbin_config_file(config, outdir) - + nfiles = _legacy_make_lstbin_config_file(config, outdir, _datafiles) + if not parallelize: nfiles = 1 @@ -1645,11 +1640,10 @@ def build_lstbin_makeflow_from_config( return - def build_lstbin_notebook_makeflow_from_config( - config_file: str | Path, - mf_name: str | None = None, - work_dir: str | Path | None = None, + config_file: str | Path, + mf_name: str | None=None, + work_dir: str | Path | None=None, ) -> None: """Construct a notebook-based LST-binning makeflow file from input data and a config_file. @@ -1663,7 +1657,7 @@ def build_lstbin_notebook_makeflow_from_config( The name of makeflow file. Defaults to ".mf" if not specified. work_dir : str or Path, optional - The directory in which to write the makeflow file and wrapper files. + The directory in which to write the makeflow file and wrapper files. If not specified, the parent directory of the config file will be used. """ config_file = Path(config_file) @@ -1672,7 +1666,7 @@ def build_lstbin_notebook_makeflow_from_config( if mf_name is None: mf_name = config_file.with_suffix(".mf").name - + work_dir = Path(work_dir or config_file.parent).absolute() makeflowfile = work_dir / mf_name @@ -1682,9 +1676,9 @@ def build_lstbin_notebook_makeflow_from_config( # Write the toml config to the output directory. shutil.copy2(config_file, outdir / "lstbin-config.toml") - # Also write a YAML version of just the parameters, to be used to run + # Also write a YAML version of just the parameters, to be used to run # the notebook - cfg_opts = toml.load(config_file)["LSTAVG_OPTS"] + cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] lstavg_config = outdir / "lstavg-config.toml" with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) @@ -1698,22 +1692,20 @@ def build_lstbin_notebook_makeflow_from_config( # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config["LSTBIN_OPTS"]["lstavg_toml_file"] = str(lstavg_config.absolute()) - config["LSTBIN_OPTS"]["kernel"] = conda_env + config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) + config['LSTBIN_OPTS']['kernel'] = conda_env - if "make_plots" not in config["LSTBIN_OPTS"]: - if "plot_every" in config["LSTBIN_OPTS"]: - plot_every = int( - get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False) - ) + if 'make_plots' not in config['LSTBIN_OPTS']: + if 'plot_every' in config["LSTBIN_OPTS"]: + plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) else: plot_every = 1 else: - plot_every = int(bool(config["LSTBIN_OPTS"]["make_plots"])) + plot_every = int(bool(config['LSTBIN_OPTS']['make_plots'])) # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) - + actions = get_config_entry(config, "WorkFlow", "actions", required=True) if len(actions) > 1: raise ValueError("This function only supports a single action in the workflow.") @@ -1731,7 +1723,9 @@ def build_lstbin_notebook_makeflow_from_config( base_mem = get_config_entry(config, "Options", "base_mem", required=True) base_cpu = get_config_entry(config, "Options", "base_cpu", required=False) mail_user = get_config_entry(config, "Options", "mail_user", required=False) - default_queue = get_config_entry(config, "Options", "default_queue", required=False) + default_queue = get_config_entry( + config, "Options", "default_queue", required=False + ) if default_queue is None: default_queue = "hera" batch_options = process_batch_options( @@ -1739,7 +1733,7 @@ def build_lstbin_notebook_makeflow_from_config( ) lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir) - config["LSTBIN_OPTS"]["lstconf"] = str(lstbin_config_file.absolute()) + config['LSTBIN_OPTS']['lstconf'] = str(lstbin_config_file.absolute()) if not parallelize: nfiles = 1 @@ -1771,21 +1765,18 @@ def build_lstbin_notebook_makeflow_from_config( fl.write( f"""# makeflow file generated from config file {config_file.name} # created at {dt} -export BATCH_OPTIONS = {batch_options} -""" - ) - +export BATCH_OPTIONS = {batch_options} +""") + # loop over output files for output_file_index in range(nfiles): # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) if plot_every > 0: - config["LSTBIN_OPTS"]["make_plots"] = str( - output_file_index % plot_every == 0 - ) + config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) else: - config["LSTBIN_OPTS"]["make_plots"] = "False" + config["LSTBIN_OPTS"]['make_plots'] = "False" # make outfile list outfile = Path(f"{output_file_index:04}.LSTBIN.out") @@ -1805,11 +1796,9 @@ def build_lstbin_notebook_makeflow_from_config( # make a small wrapper script that will run the actual command # can't embed if; then statements in makeflow script wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" - + with open(wrapper_script, "w") as f2: - f2.write( - wrapper_template.format(args=args, outfile=outfile, logfile=logfile) - ) + f2.write(wrapper_template.format(args=args, outfile=outfile, logfile=logfile)) # make file executable os.chmod(wrapper_script, 0o755) @@ -1819,13 +1808,14 @@ def build_lstbin_notebook_makeflow_from_config( lines = f"{outfile}: {command}\n\t{wrapper_script} > {logfile} 2>&1\n" fl.write(lines) + + # Also write the conda_env export to the LSTbin dir if conda_env is not None: os.system( f"conda env export -n {conda_env} --file {outdir}/environment.yaml" ) - def clean_wrapper_scripts(work_dir): """Clean up wrapper scripts from work directory. From 10168fe4f3ec35ac56ff394d2272e498a028e96b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:46:54 +0000 Subject: [PATCH 23/35] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hera_opm/mf_tools.py | 77 +++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 7275b40..71a1b85 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -16,6 +16,7 @@ from pathlib import Path import math + def get_jd(filename): """Get the JD from a data file name. @@ -707,6 +708,7 @@ def build_makeflow_from_config( return + def _get_timeout(config): timeout = get_config_entry(config, "Options", "timeout", required=False) if timeout is not None: @@ -722,6 +724,7 @@ def _get_timeout(config): ) return timeout + def build_analysis_makeflow_from_config( obsids, config_file, mf_name=None, work_dir=None ): @@ -1369,6 +1372,7 @@ def get_lstbin_datafiles(config, parent_dir): for df in datafiles ] + def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): try: from hera_cal.lst_stack import make_lst_bin_config_file @@ -1377,9 +1381,7 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): # Get dlst. Updated version supports leaving dlst unspecified or set as null. # To support older versions which required string 'None', set that to None here. - dlst = get_config_entry( - config, "LSTBIN_OPTS", "dlst", default=None, required=False - ) + dlst = get_config_entry(config, "LSTBIN_OPTS", "dlst", default=None, required=False) if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): warnings.warn( "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", @@ -1429,11 +1431,12 @@ def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): ) print(f"Created lstbin config file at {lstbin_config_file}.") - return len(file_config['matched_files']) + return len(file_config["matched_files"]) + def make_lstbin_config_file(config, outdir: str) -> int: # This must be a TOML file that specifies how to construct the LSTbin file-config - lstconfig = config['FILE_CFG'] + lstconfig = config["FILE_CFG"] from hera_cal.lst_stack.config import LSTBinConfiguration @@ -1456,9 +1459,6 @@ def make_lstbin_config_file(config, outdir: str) -> int: return lstbin_config_file, len(lst_file_config.matched_files) - - - def build_lstbin_makeflow_from_config( config_file, mf_name=None, work_dir=None, **kwargs ): @@ -1564,7 +1564,7 @@ def build_lstbin_makeflow_from_config( _datafiles = [df for df in _datafiles if len(df) > 0] nfiles = _legacy_make_lstbin_config_file(config, outdir, _datafiles) - + if not parallelize: nfiles = 1 @@ -1640,10 +1640,11 @@ def build_lstbin_makeflow_from_config( return + def build_lstbin_notebook_makeflow_from_config( - config_file: str | Path, - mf_name: str | None=None, - work_dir: str | Path | None=None, + config_file: str | Path, + mf_name: str | None = None, + work_dir: str | Path | None = None, ) -> None: """Construct a notebook-based LST-binning makeflow file from input data and a config_file. @@ -1657,7 +1658,7 @@ def build_lstbin_notebook_makeflow_from_config( The name of makeflow file. Defaults to ".mf" if not specified. work_dir : str or Path, optional - The directory in which to write the makeflow file and wrapper files. + The directory in which to write the makeflow file and wrapper files. If not specified, the parent directory of the config file will be used. """ config_file = Path(config_file) @@ -1666,7 +1667,7 @@ def build_lstbin_notebook_makeflow_from_config( if mf_name is None: mf_name = config_file.with_suffix(".mf").name - + work_dir = Path(work_dir or config_file.parent).absolute() makeflowfile = work_dir / mf_name @@ -1676,9 +1677,9 @@ def build_lstbin_notebook_makeflow_from_config( # Write the toml config to the output directory. shutil.copy2(config_file, outdir / "lstbin-config.toml") - # Also write a YAML version of just the parameters, to be used to run + # Also write a YAML version of just the parameters, to be used to run # the notebook - cfg_opts = toml.load(config_file)['LSTAVG_OPTS'] + cfg_opts = toml.load(config_file)["LSTAVG_OPTS"] lstavg_config = outdir / "lstavg-config.toml" with open(lstavg_config, "w") as fl: toml.dump(cfg_opts, fl) @@ -1692,20 +1693,22 @@ def build_lstbin_notebook_makeflow_from_config( # set output_file_select to None config["LSTBIN_OPTS"]["output_file_select"] = str("None") - config['LSTBIN_OPTS']['lstavg_toml_file'] = str(lstavg_config.absolute()) - config['LSTBIN_OPTS']['kernel'] = conda_env + config["LSTBIN_OPTS"]["lstavg_toml_file"] = str(lstavg_config.absolute()) + config["LSTBIN_OPTS"]["kernel"] = conda_env - if 'make_plots' not in config['LSTBIN_OPTS']: - if 'plot_every' in config["LSTBIN_OPTS"]: - plot_every = int(get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False)) + if "make_plots" not in config["LSTBIN_OPTS"]: + if "plot_every" in config["LSTBIN_OPTS"]: + plot_every = int( + get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False) + ) else: plot_every = 1 else: - plot_every = int(bool(config['LSTBIN_OPTS']['make_plots'])) + plot_every = int(bool(config["LSTBIN_OPTS"]["make_plots"])) # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) - + actions = get_config_entry(config, "WorkFlow", "actions", required=True) if len(actions) > 1: raise ValueError("This function only supports a single action in the workflow.") @@ -1723,9 +1726,7 @@ def build_lstbin_notebook_makeflow_from_config( base_mem = get_config_entry(config, "Options", "base_mem", required=True) base_cpu = get_config_entry(config, "Options", "base_cpu", required=False) mail_user = get_config_entry(config, "Options", "mail_user", required=False) - default_queue = get_config_entry( - config, "Options", "default_queue", required=False - ) + default_queue = get_config_entry(config, "Options", "default_queue", required=False) if default_queue is None: default_queue = "hera" batch_options = process_batch_options( @@ -1733,7 +1734,7 @@ def build_lstbin_notebook_makeflow_from_config( ) lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir) - config['LSTBIN_OPTS']['lstconf'] = str(lstbin_config_file.absolute()) + config["LSTBIN_OPTS"]["lstconf"] = str(lstbin_config_file.absolute()) if not parallelize: nfiles = 1 @@ -1765,18 +1766,21 @@ def build_lstbin_notebook_makeflow_from_config( fl.write( f"""# makeflow file generated from config file {config_file.name} # created at {dt} -export BATCH_OPTIONS = {batch_options} -""") - +export BATCH_OPTIONS = {batch_options} +""" + ) + # loop over output files for output_file_index in range(nfiles): # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) if plot_every > 0: - config["LSTBIN_OPTS"]['make_plots'] = str(output_file_index % plot_every == 0) + config["LSTBIN_OPTS"]["make_plots"] = str( + output_file_index % plot_every == 0 + ) else: - config["LSTBIN_OPTS"]['make_plots'] = "False" + config["LSTBIN_OPTS"]["make_plots"] = "False" # make outfile list outfile = Path(f"{output_file_index:04}.LSTBIN.out") @@ -1796,9 +1800,11 @@ def build_lstbin_notebook_makeflow_from_config( # make a small wrapper script that will run the actual command # can't embed if; then statements in makeflow script wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" - + with open(wrapper_script, "w") as f2: - f2.write(wrapper_template.format(args=args, outfile=outfile, logfile=logfile)) + f2.write( + wrapper_template.format(args=args, outfile=outfile, logfile=logfile) + ) # make file executable os.chmod(wrapper_script, 0o755) @@ -1808,14 +1814,13 @@ def build_lstbin_notebook_makeflow_from_config( lines = f"{outfile}: {command}\n\t{wrapper_script} > {logfile} 2>&1\n" fl.write(lines) - - # Also write the conda_env export to the LSTbin dir if conda_env is not None: os.system( f"conda env export -n {conda_env} --file {outdir}/environment.yaml" ) + def clean_wrapper_scripts(work_dir): """Clean up wrapper scripts from work directory. From df7e68f55620d4006f20effa589961fffea9b643 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Fri, 29 Mar 2024 10:51:09 +0100 Subject: [PATCH 24/35] fix: make outdir if it doesn't exist --- hera_opm/mf_tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 71a1b85..ea621a2 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1675,6 +1675,9 @@ def build_lstbin_notebook_makeflow_from_config( outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) # Write the toml config to the output directory. + if not outdir.exists(): + outdir.mkdir() + shutil.copy2(config_file, outdir / "lstbin-config.toml") # Also write a YAML version of just the parameters, to be used to run From f9f1b7ec96a9b90aed37fa0630fa642f86e54bc5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:51:13 +0000 Subject: [PATCH 25/35] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index ea621a2..d33aff9 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1677,7 +1677,7 @@ def build_lstbin_notebook_makeflow_from_config( # Write the toml config to the output directory. if not outdir.exists(): outdir.mkdir() - + shutil.copy2(config_file, outdir / "lstbin-config.toml") # Also write a YAML version of just the parameters, to be used to run From 9ae5ca53b9b883063d0c68125415211f09c920f2 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Fri, 29 Mar 2024 10:52:26 +0100 Subject: [PATCH 26/35] style: run pre-commit --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0e59e3e..b9461bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,6 @@ requires = ["setuptools", "wheel", "toml>=0.9.4", "setuptools_scm[toml]>=6.2"] [tool.black] line-length = 88 -py36 = false exclude = ''' /( \.eggs From 3e6625baa133921a8ed6669111fac277f63c3d1c Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Fri, 29 Mar 2024 10:54:27 +0100 Subject: [PATCH 27/35] fix: use toml.dumps instead of toml.dump --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index d33aff9..5cb1775 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1440,7 +1440,7 @@ def make_lstbin_config_file(config, outdir: str) -> int: from hera_cal.lst_stack.config import LSTBinConfiguration - lstconfig = LSTBinConfiguration.from_toml(toml.dump(lstconfig)) + lstconfig = LSTBinConfiguration.from_toml(toml.dumps(lstconfig)) print(f"Found {len(lstconfig.data_files)} nights of data.") print("Each night has the following number of files:") for flist in lstconfig.data_files: From 28729c3abdc6240d6ae7adbab17b8ed04eb5da1f Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 4 Apr 2024 12:47:38 +0200 Subject: [PATCH 28/35] test: cover the lstbin makeflow better --- hera_opm/data/sample_config/lstbin.toml | 30 -- .../data/sample_config/lstbin_options.toml | 30 -- .../data/sample_config/lstbin_simple.toml | 33 -- hera_opm/data/sample_config/lstbin_v2.toml | 32 -- hera_opm/mf_tools.py | 310 +----------------- hera_opm/tests/test_lstbin_makeflow.py | 271 +++++++++++++++ hera_opm/tests/test_mf_tools.py | 201 ------------ setup.cfg | 2 +- 8 files changed, 280 insertions(+), 629 deletions(-) delete mode 100644 hera_opm/data/sample_config/lstbin.toml delete mode 100644 hera_opm/data/sample_config/lstbin_options.toml delete mode 100644 hera_opm/data/sample_config/lstbin_simple.toml delete mode 100644 hera_opm/data/sample_config/lstbin_v2.toml create mode 100644 hera_opm/tests/test_lstbin_makeflow.py diff --git a/hera_opm/data/sample_config/lstbin.toml b/hera_opm/data/sample_config/lstbin.toml deleted file mode 100644 index 317fa13..0000000 --- a/hera_opm/data/sample_config/lstbin.toml +++ /dev/null @@ -1,30 +0,0 @@ -[Options] -makeflow_type = "lstbin" -path_to_do_scripts = "~/hera/hera_opm/hera_opm/data/sample_task_scripts" -source_script = "~/.bashrc" -conda_env = "hera" -base_mem = 10000 -base_cpu = 1 - -[LSTBIN_OPTS] -sig_clip = true -sigma = 5 -min_N = 5 -rephase = false -ntimes_per_file = 60 -dlst = "None" -lst_start = 0.0 -fixed_lst_start = false -vis_units = "Jy" -parallelize = true -file_ext = "grp1.of2.{}.{}.{:7.5f}.uvh5" -outdir = "../data" -parent_dir = "../data" -data_files = ["'zen.2458043.*.HH.uvh5'", - "'zen.2458044.*.HH.uvh5'", - "'zen.2458045.*.HH.uvh5'"] - -[LSTBIN] -args = ["sig_clip", "sigma", "min_N", "rephase", "ntimes_per_file", "lst_start", - "fixed_lst_start", "dlst", "vis_units", "output_file_select", "file_ext", - "outdir"] diff --git a/hera_opm/data/sample_config/lstbin_options.toml b/hera_opm/data/sample_config/lstbin_options.toml deleted file mode 100644 index 2f50e91..0000000 --- a/hera_opm/data/sample_config/lstbin_options.toml +++ /dev/null @@ -1,30 +0,0 @@ -[Options] -makeflow_type = "lstbin" -path_to_do_scripts = "~/hera/hera_opm/hera_opm/data/sample_task_scripts" -conda_env = "hera" -base_mem = 10000 -base_cpu = 1 -timeout = "24h" - -[LSTBIN_OPTS] -sig_clip = true -sigma = 5 -min_N = 5 -rephase = false -ntimes_per_file = 60 -dlst = "None" -lst_start = 0.0 -fixed_lst_start = false -vis_units = "Jy" -parallelize = true -file_ext = "grp1.of2.{}.{}.{:7.5f}.uvh5" -outdir = "./" -parent_dir = "./" -data_files = ["'zen.2458043.*.HH.uvh5'", - "'zen.2458044.*.HH.uvh5'", - "'zen.2458045.*.HH.uvh5'"] - -[LSTBIN] -args = ["sig_clip", "sigma", "min_N", "rephase", "ntimes_per_file", "lst_start", - "fixed_lst_start", "dlst", "vis_units", "output_file_select", "file_ext", - "outdir"] diff --git a/hera_opm/data/sample_config/lstbin_simple.toml b/hera_opm/data/sample_config/lstbin_simple.toml deleted file mode 100644 index dfe5526..0000000 --- a/hera_opm/data/sample_config/lstbin_simple.toml +++ /dev/null @@ -1,33 +0,0 @@ -[Options] -makeflow_type = "lstbin" -path_to_do_scripts = "~/hera/hera_opm/hera_opm/data/sample_task_scripts" -source_script = "~/.bashrc" -conda_env = "hera" -base_mem = 10000 -base_cpu = 1 - -[LSTBIN_OPTS] -sig_clip = true -sigma = 5 -min_N = 5 -rephase = false -ntimes_per_file = 60 -dlst = "None" -lst_start = 0.0 -fixed_lst_start = false -vis_units = "Jy" -parallelize = true -file_ext = "grp1.of2.{}.{}.{:7.5f}.uvh5" -outdir = "../data" -parent_dir = "../data" - -datadir = "placeholder" -nightdirs = ["2458043", "2458044", "2458045"] -sd="HH" -label="" -extension="uvh5" - -[LSTBIN] -args = ["sig_clip", "sigma", "min_N", "rephase", "ntimes_per_file", "lst_start", - "fixed_lst_start", "dlst", "vis_units", "output_file_select", "file_ext", - "outdir"] diff --git a/hera_opm/data/sample_config/lstbin_v2.toml b/hera_opm/data/sample_config/lstbin_v2.toml deleted file mode 100644 index c73692e..0000000 --- a/hera_opm/data/sample_config/lstbin_v2.toml +++ /dev/null @@ -1,32 +0,0 @@ -# IDR2 v2 examle config -[Options] -makeflow_type = "lstbin" -path_to_do_scripts = "~/hera/hera_opm/hera_opm/data/sample_task_scripts" -source_script = "~/.bashrc" -conda_env = "hera" -base_mem = 10000 -base_cpu = 1 - -[LSTBIN_OPTS] -sig_clip = true -Nbls_to_load = "None" -sigma = 5 -min_N = 5 -rephase = false -ntimes_per_file = 60 -dlst = "None" -lst_start = 0.0 -fixed_lst_start = false -vis_units = "Jy" -parallelize = true -file_ext = "grp1.of2.{type}.{time:7.5f}.uvcA" -outdir = "../data" -parent_dir = "../data" -data_files = ["'zen.2458043.40141.HH.uvh5'", - "'zen.2458043.40887.HH.uvh5'"] -calibration = "" - -[LSTBIN] -args = ["sig_clip", "sigma", "min_N", "rephase", "ntimes_per_file", "lst_start", - "fixed_lst_start", "dlst", "vis_units", "output_file_select", "file_ext", - "outdir", "Nbls_to_load", "calibration"] diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 5cb1775..f37931f 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -11,10 +11,8 @@ import shutil import subprocess import warnings -import glob import toml from pathlib import Path -import math def get_jd(filename): @@ -680,7 +678,7 @@ def build_makeflow_from_config( "lstbin" type, and call the appropriate funciton below. """ - if isinstance(config_file, str): + if isinstance(config_file, (str, Path)): # read in config file config = toml.load(config_file) else: @@ -695,15 +693,10 @@ def build_makeflow_from_config( build_lstbin_makeflow_from_config( config_file, mf_name=mf_name, work_dir=work_dir, **kwargs ) - elif makeflow_type == "lstbin-notebook": - build_lstbin_notebook_makeflow_from_config( - config_file, mf_name=mf_name, work_dir=work_dir, **kwargs - ) else: raise ValueError( - "unknown makeflow_type {} specified; must be 'analysis' or 'lstbin'".format( - makeflow_type - ) + f"unknown makeflow_type '{makeflow_type}' specified; " + "must be 'analysis' or 'lstbin'" ) return @@ -1343,104 +1336,14 @@ def build_analysis_makeflow_from_config( return -def get_lstbin_datafiles(config, parent_dir): - """Determine the datafiles for use in LST-binning makeflow.""" - # get data files - datafiles = get_config_entry(config, "LSTBIN_OPTS", "data_files", required=False) - - if datafiles is None: - # These are only required if datafiles wasn't specified specifically. - datadir = get_config_entry(config, "LSTBIN_OPTS", "datadir", required=True) - nightdirs = get_config_entry(config, "LSTBIN_OPTS", "nightdirs", required=True) - extension = get_config_entry(config, "LSTBIN_OPTS", "extension", required=True) - label = get_config_entry(config, "LSTBIN_OPTS", "label", required=True) - sd = get_config_entry(config, "LSTBIN_OPTS", "sd", required=True) - jdglob = get_config_entry( - config, "LSTBIN_OPTS", "jdglob", required=False, default="*" - ) - - if label: - label += "." - - datafiles = [] - for nd in nightdirs: - datafiles.append(f"{datadir}/{nd}/zen.{jdglob}.{sd}.{label}{extension}") - - # encapsulate in double quotes - return [ - "'{}'".format('"{}"'.format(os.path.join(parent_dir, df.strip('"').strip("'")))) - for df in datafiles - ] - - -def _legacy_make_lstbin_config_file(config, outdir: Path, datafiles): - try: - from hera_cal.lst_stack import make_lst_bin_config_file - except ImportError: - from hera_cal.lstbin_simple import make_lst_bin_config_file - - # Get dlst. Updated version supports leaving dlst unspecified or set as null. - # To support older versions which required string 'None', set that to None here. - dlst = get_config_entry(config, "LSTBIN_OPTS", "dlst", default=None, required=False) - if isinstance(dlst, str) and dlst.lower() in ("none", "null", ""): - warnings.warn( - "dlst should not be set to (string) 'None', but rather left unspecified in your TOML.", - DeprecationWarning, - ) - dlst = None - - lstbin_config_file = Path(outdir) / "file-config.yaml" - - clobber = get_config_entry(config, "LSTBIN_OPTS", "overwrite", default=False) - atol = get_config_entry(config, "LSTBIN_OPTS", "atol", default=1e-10) - lst_start = get_config_entry( - config, "LSTBIN_OPTS", "lst_start", default=None, required=False - ) - lst_width = get_config_entry( - config, "LSTBIN_OPTS", "lst_width", default=2 * math.pi - ) - ntimes_per_file = get_config_entry( - config, "LSTBIN_OPTS", "ntimes_per_file", default=60 - ) - blts_are_rectangular = get_config_entry( - config, "LSTBIN_OPTS", "blts_are_rectangular", default=None, required=False - ) - time_axis_faster_than_bls = get_config_entry( - config, - "LSTBIN_OPTS", - "time_axis_faster_than_bls", - default=None, - required=False, - ) - jd_regex = get_config_entry( - config, "LSTBIN_OPTS", "jd_regex", default=r"zen\.(\d+\.\d+)\." - ) - - file_config = make_lst_bin_config_file( - config_file=lstbin_config_file, - data_files=datafiles, - clobber=clobber, - dlst=dlst, - atol=atol, - lst_start=lst_start, - lst_width=lst_width, - ntimes_per_file=ntimes_per_file, - blts_are_rectangular=blts_are_rectangular, - time_axis_faster_than_bls=time_axis_faster_than_bls, - jd_regex=jd_regex, - ) - print(f"Created lstbin config file at {lstbin_config_file}.") - - return len(file_config["matched_files"]) - - def make_lstbin_config_file(config, outdir: str) -> int: # This must be a TOML file that specifies how to construct the LSTbin file-config lstconfig = config["FILE_CFG"] - from hera_cal.lst_stack.config import LSTBinConfiguration + from hera_cal.lst_stack.config import LSTBinConfigurator - lstconfig = LSTBinConfiguration.from_toml(toml.dumps(lstconfig)) + print(lstconfig) + lstconfig = LSTBinConfigurator.from_toml(toml.dumps(lstconfig)) print(f"Found {len(lstconfig.data_files)} nights of data.") print("Each night has the following number of files:") for flist in lstconfig.data_files: @@ -1460,191 +1363,10 @@ def make_lstbin_config_file(config, outdir: str) -> int: def build_lstbin_makeflow_from_config( - config_file, mf_name=None, work_dir=None, **kwargs -): - """Construct an LST-binning makeflow file from input data and a config_file. - - Parameters - ---------- - config_file : str - Full path to config file containing options. - mf_name : str - The name of makeflow file. Defaults to ".mf" if not - specified. - - Returns - ------- - None - - - Notes - ----- - The major difference between this function and the one above is the use of - the `config_lst_bin_files` function from hera_cal, which is used to - determine the number of output files, which are parallelized over in the - makeflow. - - """ - # import hera_cal - - # read in config file - config = toml.load(config_file) - cf = os.path.basename(config_file) - - # get LSTBIN arguments - lstbin_args = get_config_entry(config, "LSTBIN", "args", required=False) - - # set output_file_select to None - config["LSTBIN_OPTS"]["output_file_select"] = str("None") - - # get general options - path_to_do_scripts = Path(get_config_entry(config, "Options", "path_to_do_scripts")) - conda_env = get_config_entry(config, "Options", "conda_env", required=False) - source_script = get_config_entry(config, "Options", "source_script", required=False) - batch_system = get_config_entry(config, "Options", "batch_system", required=False) - timeout = _get_timeout(config) - - # open file for writing - if mf_name is not None: - fn = mf_name - else: - base, ext = os.path.splitext(cf) - fn = "{0}.mf".format(base) - - # determine whether or not to parallelize - parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) - if "parent_dir" in kwargs: - parent_dir = Path(kwargs["parent_dir"]) - else: - parent_dir = Path( - get_config_entry(config, "LSTBIN_OPTS", "parent_dir", required=True) - ) - - work_dir = Path(work_dir or parent_dir) - - makeflowfile = work_dir / fn - - # define command - command = path_to_do_scripts / "do_LSTBIN.sh" - - # write makeflow file - with open(makeflowfile, "w") as f: - # add comment at top of file listing date of creation and config file name - dt = time.strftime("%H:%M:%S on %d %B %Y") - print("# makeflow file generated from config file {}".format(cf), file=f) - print("# created at {}".format(dt), file=f) - - # add resource information - base_mem = get_config_entry(config, "Options", "base_mem", required=True) - base_cpu = get_config_entry(config, "Options", "base_cpu", required=False) - mail_user = get_config_entry(config, "Options", "mail_user", required=False) - default_queue = get_config_entry( - config, "Options", "default_queue", required=False - ) - if default_queue is None: - default_queue = "hera" - batch_options = process_batch_options( - base_mem, base_cpu, mail_user, default_queue, batch_system - ) - print("export BATCH_OPTIONS = {}".format(batch_options), file=f) - - if "outdir" in kwargs: - outdir = Path(kwargs["outdir"]) - else: - outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) - - datafiles = get_lstbin_datafiles(config, parent_dir) - - print("Searching for files in the following globs: ") - for df in datafiles: - print(" " + df.strip("'").strip('"')) - - # pre-process files to determine the number of output files - _datafiles = [sorted(glob.glob(df.strip("'").strip('"'))) for df in datafiles] - _datafiles = [df for df in _datafiles if len(df) > 0] - - nfiles = _legacy_make_lstbin_config_file(config, outdir, _datafiles) - - if not parallelize: - nfiles = 1 - - # loop over output files - for output_file_index in range(nfiles): - # if parallize, update output_file_select - if parallelize: - config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) - - # make outfile list - outfile = Path(f"lstbin_outfile_{output_file_index}.LSTBIN.out") - - # get args list for lst-binning step - args = [ - str(get_config_entry(config, "LSTBIN_OPTS", a, required=True)) - for a in lstbin_args - ] - # turn into string - args = " ".join(args) - - # make logfile name - # logfile will capture stdout and stderr - logfile = work_dir / outfile.with_suffix(".log").name - - # make a small wrapper script that will run the actual command - # can't embed if; then statements in makeflow script - wrapper_script = work_dir / f"wrapper_{outfile.with_suffix('.sh').name}" - with open(wrapper_script, "w") as f2: - print("#!/bin/bash", file=f2) - if source_script is not None: - print("source {}".format(source_script), file=f2) - if conda_env is not None: - print("conda activate {}".format(conda_env), file=f2) - print("date", file=f2) - print("cd {}".format(parent_dir), file=f2) - if timeout is not None: - print( - "timeout {0} {1} {2}".format(timeout, command, args), - file=f2, - ) - else: - print("{0} {1}".format(command, args), file=f2) - print("if [ $? -eq 0 ]; then", file=f2) - print(" cd {}".format(work_dir), file=f2) - print(" touch {}".format(outfile), file=f2) - print("else", file=f2) - print( - " mv {0} {1}".format( - logfile, logfile.parent / f"{logfile.name}.error" - ), - file=f2, - ) - print("fi", file=f2) - print("date", file=f2) - # make file executable - os.chmod(wrapper_script, 0o755) - - # first line lists target file to make (dummy output file), and requirements - # second line is "build rule", which runs the shell script and makes the output file - line1 = "{0}: {1}".format(outfile, command) - line2 = "\t{0} > {1} 2>&1\n".format(wrapper_script, logfile) - print(line1, file=f) - print(line2, file=f) - - # Write the toml config to the output directory. - shutil.copy2(config_file, outdir / "lstbin-config.toml") - - # Also write the conda_env export to the LSTbin dir - if conda_env is not None: - os.system( - f"conda env export -n {conda_env} --file {outdir}/environment.yaml" - ) - - return - - -def build_lstbin_notebook_makeflow_from_config( config_file: str | Path, mf_name: str | None = None, work_dir: str | Path | None = None, + outdir: str | Path | None = None, ) -> None: """Construct a notebook-based LST-binning makeflow file from input data and a config_file. @@ -1672,7 +1394,7 @@ def build_lstbin_notebook_makeflow_from_config( makeflowfile = work_dir / mf_name - outdir = Path(get_config_entry(config, "LSTBIN_OPTS", "outdir")) + outdir = Path(outdir or get_config_entry(config, "LSTBIN_OPTS", "outdir")) # Write the toml config to the output directory. if not outdir.exists(): @@ -1699,16 +1421,6 @@ def build_lstbin_notebook_makeflow_from_config( config["LSTBIN_OPTS"]["lstavg_toml_file"] = str(lstavg_config.absolute()) config["LSTBIN_OPTS"]["kernel"] = conda_env - if "make_plots" not in config["LSTBIN_OPTS"]: - if "plot_every" in config["LSTBIN_OPTS"]: - plot_every = int( - get_config_entry(config, "LSTBIN_OPTS", "plot_every", required=False) - ) - else: - plot_every = 1 - else: - plot_every = int(bool(config["LSTBIN_OPTS"]["make_plots"])) - # determine whether or not to parallelize parallelize = get_config_entry(config, "LSTBIN_OPTS", "parallelize", required=True) @@ -1778,12 +1490,6 @@ def build_lstbin_notebook_makeflow_from_config( # if parallize, update output_file_select if parallelize: config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index) - if plot_every > 0: - config["LSTBIN_OPTS"]["make_plots"] = str( - output_file_index % plot_every == 0 - ) - else: - config["LSTBIN_OPTS"]["make_plots"] = "False" # make outfile list outfile = Path(f"{output_file_index:04}.LSTBIN.out") diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py new file mode 100644 index 0000000..85e7335 --- /dev/null +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -0,0 +1,271 @@ +from pathlib import Path +import toml +from ..data import DATA_PATH +import pytest +from hera_opm import mf_tools as mt +import shutil + +hera_cal = pytest.importorskip("hera_cal") + + +def make_lstbin_config_file( + fl: Path, + datafiles: dict | list, + notebook: bool = False, + options=None, + lstbin_opts=None, + file_cfg=None, + lstavg_opts=None, +): + """Make a lstbin config file.""" + options = { + **{ + "makeflow_type": "lstbin", + "path_to_do_scripts": "/an/unused/path/for/tests", + "source_script": "~/.bashrc", + "conda_env": "hera", + "base_mem": 10000, + "base_cpu": 1, + }, + **(options or {}), + } + + lstbin_opts = { + **{ + "parallelize": True, + "outdir": str(fl.parent), + "parent_dir": str(fl.parent), + }, + **(lstbin_opts or {}), + } + + file_cfg = { + **{ + "nlsts_per_file": 60, + "lst_start": 0.0, + "datafiles": datafiles, + }, + **(file_cfg or {}), + } + + if isinstance(datafiles, list): + file_cfg["datadir"] = str(DATA_PATH) + + lstavg_opts = { + **{ + "outdir": "../data", + "bl_chunk_size": 5000, + "fname_format": "{inpaint_mode}/zen.{kind}.{lst:7.5f}.sum.uvh5", + "overwrite": True, + "write_med_mad": True, + "rephase": False, + }, + **(lstavg_opts or {}), + } + + if notebook: + action = "PER_OUTFILE_LSTSTACK_METRICS_NOTEBOOK" + args = [ + "outdir", + "lstconf", + "lstavg_toml_file", + "output_file_select", + "kernel", + ] + else: + action = "LSTBIN" + args = [ + "lstconf", + "lstavg_toml_file", + "output_file_select", + ] + + tomldict = { + "Options": options, + "LSTBIN_OPTS": lstbin_opts, + "FILE_CFG": file_cfg, + "LSTAVG_OPTS": lstavg_opts, + "WorkFlow": {"actions": [action]}, + action: {"args": args}, + } + + with open(fl, "w") as fl: + toml.dump(tomldict, fl) + + +@pytest.fixture(scope="module") +def lsttoml_direct_datafiles(tmp_path_factory) -> Path: + """Make a direct lstbin config file.""" + fl = tmp_path_factory.mktemp("data") / "lstbin_direct.toml" + make_lstbin_config_file( + fl, datafiles=["zen.2458043.40141.HH.uvh5", "zen.2458043.40887.HH.uvh5"] + ) + return fl + + +@pytest.fixture(scope="module") +def lsttoml_direct_datafiles_glob(tmp_path_factory) -> Path: + """Make a direct lstbin config file.""" + fl = tmp_path_factory.mktemp("data") / "lstbin_direct.toml" + make_lstbin_config_file( + fl, + datafiles=[ + "zen.2458043.*.HH.uvh5", + "zen.2458044.*.HH.uvh5", + "zen.2458045.*.HH.uvh5", + ], + ) + return fl + + +@pytest.fixture(scope="module") +def datafiles_in_nightly_folders(tmp_path_factory) -> Path: + + topdir = tmp_path_factory.mktemp("nightly-data") + + # Also, put our input files into nightly folders + for night in ["2458043", "2458044", "2458045"]: + ndir = topdir / night + ndir.mkdir() + + for fl in Path(DATA_PATH).glob("zen.*.uvh5"): + if f"{night}." in fl.name: + shutil.copy(fl, ndir / fl.name) + + return topdir + + +@pytest.fixture(scope="module") +def lsttoml_notebook_datafiles( + tmp_path_factory, datafiles_in_nightly_folders: Path +) -> Path: + """Make a notebook lstbin config file.""" + fl = tmp_path_factory.mktemp("data") / "lstbin_notebook.toml" + make_lstbin_config_file( + fl, + datafiles={ + "datadir": str(datafiles_in_nightly_folders), + "nightdirs": [fl.name for fl in datafiles_in_nightly_folders.glob("*")], + "sum_or_diff": "HH", + "extension": "uvh5", + }, + notebook=True, + ) + return fl + + +# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") +# @pytest.mark.filterwarnings("ignore: A value for the") +# @pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") +@pytest.mark.parametrize( + "config_file", + [ + "lsttoml_direct_datafiles", + "lsttoml_direct_datafiles_glob", + "lsttoml_notebook_datafiles", + ], +) +@pytest.mark.parametrize("give_mf_name", [True, False]) +def test_build_makeflow_from_config_lstbin_options( + config_file, + tmp_path_factory, + request, + give_mf_name, +): + """Test building a makeflow from a lstbin config file. + + In particular, this function calls the build_makeflow_from_config directly, + which dispatches to the build_lstbin_makeflow_from_config function. + """ + config_file = request.getfixturevalue(config_file) + + # test lstbin version with options + obsids = None + work_dir = tmp_path_factory.mktemp("test_output") + outfile = work_dir / config_file.name.replace(".toml", ".mf") + + mt.build_makeflow_from_config( + obsids, + config_file, + mf_name=outfile.name if give_mf_name else None, + work_dir=work_dir, + outdir=work_dir, # pass directly so that we can check the output + ) + + # make sure the output files we expected appeared + assert outfile.exists() + + +# @hc_skip +# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") +# @pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") +# @pytest.mark.parametrize( +# "provide_outfile,v2", [(False, False), (True, False), (False, True)] +# ) +# def test_build_lstbin_makeflow_from_config( +# config_options, tmp_path_factory, provide_outfile: bool, v2: bool +# ): +# # define load in config +# config_file = config_options["config_file_lstbin"] + +# if v2: +# config_file = config_file.replace("lstbin", "lstbin_v2") + +# # setup vars +# work_dir = tmp_path_factory.mktemp("test_output") +# if provide_outfile: +# mf_output = "output.mf" +# else: +# mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" +# outfile = work_dir / mf_output + +# kwargs = {"work_dir": str(work_dir), "parent_dir": DATA_PATH, "outdir": work_dir} +# if provide_outfile: +# kwargs["mf_name"] = outfile + +# mt.build_lstbin_makeflow_from_config(config_file, **kwargs) + +# # make sure the output files we expected appeared +# assert outfile.exists() + +# # check that the wrapper scripts have the right lines in them +# wrapper_scripts = [ +# f for f in sorted(os.listdir(work_dir)) if f.startswith("wrapper_") +# ] +# with open(work_dir / wrapper_scripts[0]) as infile: +# lines = infile.readlines() +# assert lines[0].strip() == "#!/bin/bash" +# assert lines[1].strip() == "source ~/.bashrc" +# assert lines[2].strip() == "conda activate hera" +# assert lines[3].strip() == "date" + + +# @hc_skip +# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") +# @pytest.mark.filterwarnings("ignore: A value for the") +# @pytest.mark.parametrize("provide_outfile", [True, False]) +# def test_build_lstbin_makeflow_from_config_options( +# config_options, tmp_path_factory, provide_outfile +# ): +# # define load in config +# config_file = config_options["config_file_lstbin_options"] + +# # setup vars +# work_dir = tmp_path_factory.mktemp("test_output") +# if provide_outfile: +# mf_output = "output.mf" +# else: +# mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" + +# outfile = work_dir / mf_output + +# mt.build_lstbin_makeflow_from_config( +# config_file, +# mf_name=outfile, +# work_dir=str(work_dir), +# parent_dir=DATA_PATH, +# outdir=work_dir, +# ) + +# # make sure the output files we expected appeared +# assert outfile.exists() diff --git a/hera_opm/tests/test_mf_tools.py b/hera_opm/tests/test_mf_tools.py index 49e8c1e..9568ffd 100644 --- a/hera_opm/tests/test_mf_tools.py +++ b/hera_opm/tests/test_mf_tools.py @@ -7,23 +7,11 @@ import shutil import gzip import toml -from pathlib import Path from . import BAD_CONFIG_PATH from ..data import DATA_PATH from .. import mf_tools as mt -# define a pytest marker for skipping lstbin tests -try: - import hera_cal # noqa - - hc_installed = True -except ImportError: - hc_installed = False -hc_skip = pytest.mark.skipif( - not hc_installed, reason="hera_cal must be installed for this test" -) - @pytest.fixture(scope="module") def config_options(): @@ -44,12 +32,6 @@ def config_options(): config_dict["config_file_nopol"] = os.path.join( DATA_PATH, "sample_config", "nrao_rtp_nopol.toml" ) - config_dict["config_file_lstbin"] = os.path.join( - DATA_PATH, "sample_config", "lstbin.toml" - ) - config_dict["config_file_lstbin_options"] = os.path.join( - DATA_PATH, "sample_config", "lstbin_options.toml" - ) config_dict["config_file_setup_teardown"] = os.path.join( DATA_PATH, "sample_config", "nrao_rtp_setup_teardown.toml" ) @@ -800,164 +782,6 @@ def test_setup_teardown_errors(config_options): return -@hc_skip -@pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -@pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") -@pytest.mark.parametrize( - "provide_outfile,v2", [(False, False), (True, False), (False, True)] -) -def test_build_lstbin_makeflow_from_config( - config_options, tmp_path_factory, provide_outfile: bool, v2: bool -): - # define load in config - config_file = config_options["config_file_lstbin"] - - if v2: - config_file = config_file.replace("lstbin", "lstbin_v2") - - # setup vars - work_dir = tmp_path_factory.mktemp("test_output") - if provide_outfile: - mf_output = "output.mf" - else: - mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" - outfile = work_dir / mf_output - - kwargs = {"work_dir": str(work_dir), "parent_dir": DATA_PATH, "outdir": work_dir} - if provide_outfile: - kwargs["mf_name"] = outfile - - mt.build_lstbin_makeflow_from_config(config_file, **kwargs) - - # make sure the output files we expected appeared - assert outfile.exists() - - # check that the wrapper scripts have the right lines in them - wrapper_scripts = [ - f for f in sorted(os.listdir(work_dir)) if f.startswith("wrapper_") - ] - with open(work_dir / wrapper_scripts[0]) as infile: - lines = infile.readlines() - assert lines[0].strip() == "#!/bin/bash" - assert lines[1].strip() == "source ~/.bashrc" - assert lines[2].strip() == "conda activate hera" - assert lines[3].strip() == "date" - - -@hc_skip -@pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -@pytest.mark.filterwarnings("ignore: A value for the") -@pytest.mark.parametrize("provide_outfile", [True, False]) -def test_build_lstbin_makeflow_from_config_options( - config_options, tmp_path_factory, provide_outfile -): - # define load in config - config_file = config_options["config_file_lstbin_options"] - - # setup vars - work_dir = tmp_path_factory.mktemp("test_output") - if provide_outfile: - mf_output = "output.mf" - else: - mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" - - outfile = work_dir / mf_output - - mt.build_lstbin_makeflow_from_config( - config_file, - mf_name=outfile, - work_dir=str(work_dir), - parent_dir=DATA_PATH, - outdir=work_dir, - ) - - # make sure the output files we expected appeared - assert outfile.exists() - - -@hc_skip -@pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -@pytest.mark.filterwarnings("ignore: A value for the") -def test_build_lstbin_makeflow_direct_options(config_options, tmp_path_factory): - # Get the config template - config_file = config_options["config_file_lstbin_options"] - # setup vars - work_dir = tmp_path_factory.mktemp("test_output") - mf_output = "output.mf" - outfile = work_dir / mf_output - - # Make new config with dynamic variables in it... - config = work_dir / "inputconf.toml" - with open(config_file, "r") as fl: - _cfg = toml.load(fl) - - _cfg["LSTBIN_OPTS"]["outdir"] = str(work_dir) - _cfg["LSTBIN_OPTS"]["parent_dir"] = DATA_PATH - print("DP: ", DATA_PATH) - with open(config, "w") as fl: - toml.dump(_cfg, fl) - - mt.build_lstbin_makeflow_from_config( - config, - mf_name=outfile, - work_dir=str(work_dir), - ) - - # make sure the output files we expected appeared - assert outfile.exists() - - -@hc_skip -@pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -@pytest.mark.filterwarnings("ignore: A value for the") -@pytest.mark.parametrize("label", ["", "label."]) -def test_build_lstbin_makeflow_simple(config_options, tmp_path_factory, label): - # Get the config template - config_file = config_options["config_file_lstbin"].replace( - "lstbin.", "lstbin_simple." - ) - - # setup vars - work_dir = tmp_path_factory.mktemp("test_output") - mf_output = "output.mf" - outfile = work_dir / mf_output - - # Make new config with dynamic variables in it... - config = work_dir / "inputconf.toml" - with open(config_file, "r") as fl: - _cfg = toml.load(fl) - - _cfg["LSTBIN_OPTS"]["outdir"] = str(work_dir) - _cfg["LSTBIN_OPTS"]["parent_dir"] = DATA_PATH - _cfg["LSTBIN_OPTS"]["datadir"] = str(work_dir) - _cfg["LSTBIN_OPTS"]["label"] = str(label)[:-1] - - with open(config, "w") as fl: - toml.dump(_cfg, fl) - - # Also, put our input files into nightly folders - (work_dir / "2458043").mkdir() - (work_dir / "2458044").mkdir() - (work_dir / "2458045").mkdir() - for fl in Path(DATA_PATH).glob("zen.*.uvh5"): - newfl = fl.with_suffix(f".{label}uvh5").name - if "2458043." in fl.name: - shutil.copy(fl, work_dir / "2458043" / newfl) - elif "2458044." in fl.name: - shutil.copy(fl, work_dir / "2458044" / newfl) - elif "2458045." in fl.name: - shutil.copy(fl, work_dir / "2458045" / newfl) - - mt.build_lstbin_makeflow_from_config( - config, - mf_name=outfile, - work_dir=str(work_dir), - ) - - # make sure the output files we expected appeared - assert outfile.exists() - - def test_build_makeflow_from_config(config_options): # define args obsids = config_options["obsids"][:1] @@ -985,31 +809,6 @@ def test_build_makeflow_from_config(config_options): return -@hc_skip -@pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -@pytest.mark.filterwarnings("ignore: A value for the") -@pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") -def test_build_makeflow_from_config_lstbin_options(config_options, tmp_path_factory): - # test lstbin version with options - obsids = config_options["obsids"][:1] - config_file = config_options["config_file_lstbin_options"] - work_dir = tmp_path_factory.mktemp("test_output") - mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" - outfile = work_dir / mf_output - - mt.build_makeflow_from_config( - obsids, - config_file, - mf_name=str(outfile), - work_dir=str(work_dir), - parent_dir=DATA_PATH, - outdir=work_dir, - ) - - # make sure the output files we expected appeared - assert outfile.exists() - - def test_clean_wrapper_scripts(): # define args work_dir = os.path.join(DATA_PATH, "test_output") diff --git a/setup.cfg b/setup.cfg index d7670da..8709ede 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,7 @@ long_description_content_type = text/markdown [options.extras_require] test = - hera-calibration>=3.2.3 + hera-calibration>=3.3.0 pytest pytest-cov package = From d4db7dba3e3044104debf89924668a21d9583fa9 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 4 Apr 2024 12:51:57 +0200 Subject: [PATCH 29/35] test: fix annotations --- hera_opm/tests/test_lstbin_makeflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py index 85e7335..34a5703 100644 --- a/hera_opm/tests/test_lstbin_makeflow.py +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -1,3 +1,4 @@ +from __future__ import annotations from pathlib import Path import toml from ..data import DATA_PATH From 471f4eff798a63bed1f90bfb66952b168ff75aba Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Wed, 15 May 2024 15:51:34 +0200 Subject: [PATCH 30/35] fix: remove print --- hera_opm/mf_tools.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index f37931f..5785977 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1342,7 +1342,6 @@ def make_lstbin_config_file(config, outdir: str) -> int: from hera_cal.lst_stack.config import LSTBinConfigurator - print(lstconfig) lstconfig = LSTBinConfigurator.from_toml(toml.dumps(lstconfig)) print(f"Found {len(lstconfig.data_files)} nights of data.") print("Each night has the following number of files:") From ec3b7ee7be6c9cc5edc12f27e69ad515028710cf Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Tue, 25 Jun 2024 20:25:02 +0200 Subject: [PATCH 31/35] small comment fix --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 5785977..1d07942 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1402,7 +1402,7 @@ def build_lstbin_makeflow_from_config( shutil.copy2(config_file, outdir / "lstbin-config.toml") # Also write a YAML version of just the parameters, to be used to run - # the notebook + # the notebook. cfg_opts = toml.load(config_file)["LSTAVG_OPTS"] lstavg_config = outdir / "lstavg-config.toml" with open(lstavg_config, "w") as fl: From d555d7db33e5e835737fe49645953609551f9143 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 4 Jul 2024 07:54:46 +0200 Subject: [PATCH 32/35] fix: correct passing of nights to find_datafiles --- hera_opm/tests/test_lstbin_makeflow.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py index 34a5703..e888f34 100644 --- a/hera_opm/tests/test_lstbin_makeflow.py +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -146,9 +146,8 @@ def lsttoml_notebook_datafiles( fl, datafiles={ "datadir": str(datafiles_in_nightly_folders), - "nightdirs": [fl.name for fl in datafiles_in_nightly_folders.glob("*")], - "sum_or_diff": "HH", - "extension": "uvh5", + "nights": [fl.name for fl in datafiles_in_nightly_folders.glob("*")], + "fileglob": "zen.{night}.*.HH.uvh5", }, notebook=True, ) From e5d946833fae4cc1eb3c56b2a1bc2789cfbbd020 Mon Sep 17 00:00:00 2001 From: Steven Murray Date: Thu, 4 Jul 2024 08:03:57 +0200 Subject: [PATCH 33/35] test: fix passing of fileglob --- hera_opm/tests/test_lstbin_makeflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py index e888f34..890e1ca 100644 --- a/hera_opm/tests/test_lstbin_makeflow.py +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -147,7 +147,7 @@ def lsttoml_notebook_datafiles( datafiles={ "datadir": str(datafiles_in_nightly_folders), "nights": [fl.name for fl in datafiles_in_nightly_folders.glob("*")], - "fileglob": "zen.{night}.*.HH.uvh5", + "fileglob": "{night}/zen.{night}.*.HH.uvh5", }, notebook=True, ) From e152a319a84096523e6705a120a636d43f0d7a2f Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Fri, 12 Jul 2024 10:29:15 -0700 Subject: [PATCH 34/35] remove commented out code --- hera_opm/tests/test_lstbin_makeflow.py | 78 -------------------------- 1 file changed, 78 deletions(-) diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py index 890e1ca..0573de7 100644 --- a/hera_opm/tests/test_lstbin_makeflow.py +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -154,9 +154,6 @@ def lsttoml_notebook_datafiles( return fl -# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -# @pytest.mark.filterwarnings("ignore: A value for the") -# @pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") @pytest.mark.parametrize( "config_file", [ @@ -194,78 +191,3 @@ def test_build_makeflow_from_config_lstbin_options( # make sure the output files we expected appeared assert outfile.exists() - - -# @hc_skip -# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -# @pytest.mark.filterwarnings("ignore:dlst should not be set to (string) 'None'") -# @pytest.mark.parametrize( -# "provide_outfile,v2", [(False, False), (True, False), (False, True)] -# ) -# def test_build_lstbin_makeflow_from_config( -# config_options, tmp_path_factory, provide_outfile: bool, v2: bool -# ): -# # define load in config -# config_file = config_options["config_file_lstbin"] - -# if v2: -# config_file = config_file.replace("lstbin", "lstbin_v2") - -# # setup vars -# work_dir = tmp_path_factory.mktemp("test_output") -# if provide_outfile: -# mf_output = "output.mf" -# else: -# mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" -# outfile = work_dir / mf_output - -# kwargs = {"work_dir": str(work_dir), "parent_dir": DATA_PATH, "outdir": work_dir} -# if provide_outfile: -# kwargs["mf_name"] = outfile - -# mt.build_lstbin_makeflow_from_config(config_file, **kwargs) - -# # make sure the output files we expected appeared -# assert outfile.exists() - -# # check that the wrapper scripts have the right lines in them -# wrapper_scripts = [ -# f for f in sorted(os.listdir(work_dir)) if f.startswith("wrapper_") -# ] -# with open(work_dir / wrapper_scripts[0]) as infile: -# lines = infile.readlines() -# assert lines[0].strip() == "#!/bin/bash" -# assert lines[1].strip() == "source ~/.bashrc" -# assert lines[2].strip() == "conda activate hera" -# assert lines[3].strip() == "date" - - -# @hc_skip -# @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed") -# @pytest.mark.filterwarnings("ignore: A value for the") -# @pytest.mark.parametrize("provide_outfile", [True, False]) -# def test_build_lstbin_makeflow_from_config_options( -# config_options, tmp_path_factory, provide_outfile -# ): -# # define load in config -# config_file = config_options["config_file_lstbin_options"] - -# # setup vars -# work_dir = tmp_path_factory.mktemp("test_output") -# if provide_outfile: -# mf_output = "output.mf" -# else: -# mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" - -# outfile = work_dir / mf_output - -# mt.build_lstbin_makeflow_from_config( -# config_file, -# mf_name=outfile, -# work_dir=str(work_dir), -# parent_dir=DATA_PATH, -# outdir=work_dir, -# ) - -# # make sure the output files we expected appeared -# assert outfile.exists() From 59d58a19024f65abfba9760cfabed8b768d9949e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:04:17 +0000 Subject: [PATCH 35/35] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hera_opm/tests/test_lstbin_makeflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hera_opm/tests/test_lstbin_makeflow.py b/hera_opm/tests/test_lstbin_makeflow.py index 214186e..0573de7 100644 --- a/hera_opm/tests/test_lstbin_makeflow.py +++ b/hera_opm/tests/test_lstbin_makeflow.py @@ -191,4 +191,3 @@ def test_build_makeflow_from_config_lstbin_options( # make sure the output files we expected appeared assert outfile.exists() -