From f263da603e16c3a8685052649f509afeeee84451 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Tue, 7 Jul 2020 11:25:27 -0700 Subject: [PATCH 01/22] properly pass in prereq to make_time_neighbor_list --- hera_opm/mf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index c6368f72..9f75f60f 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1013,7 +1013,7 @@ def build_analysis_makeflow_from_config( # add neighbors neighbors = make_time_neighbor_list( filename, - action, + prereq, obsids, n_time_neighbors=n_time_neighbors, time_centered=time_centered, From b06536c56a3d098d332095070c7c3068bf12a674 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Tue, 7 Jul 2020 11:49:05 -0700 Subject: [PATCH 02/22] Update existing tests for planned api change --- .../sample_config/nrao_rtp_stride_length.toml | 4 +- .../nrao_rtp_time_neighbors.toml | 4 +- .../nrao_rtp_time_neighbors_all.toml | 6 +-- hera_opm/tests/test_mf_tools.py | 38 +++++++++---------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/hera_opm/data/sample_config/nrao_rtp_stride_length.toml b/hera_opm/data/sample_config/nrao_rtp_stride_length.toml index 0e1e1f69..8bcb41c3 100644 --- a/hera_opm/data/sample_config/nrao_rtp_stride_length.toml +++ b/hera_opm/data/sample_config/nrao_rtp_stride_length.toml @@ -30,12 +30,12 @@ args = ["{basename}", "${Options:ex_ants}"] [OMNICAL_METRICS] prereqs = "OMNICAL" -n_time_neighbors = 1 +n_curr_time_neighbors = 1 args = ["{basename}", "{prev_basename}", "{next_basename}"] [OMNI_APPLY] prereqs = "OMNICAL_METRICS" -n_time_neighbors = 1 +n_curr_time_neighbors = 1 stride_length = 2 # args = ["{basename}", "{obsid_list}"] args = "{basename}" diff --git a/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml b/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml index 2fda6b7a..68ae63a4 100644 --- a/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml +++ b/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml @@ -29,12 +29,12 @@ args = ["{basename}", "${Options:ex_ants}"] [OMNICAL_METRICS] prereqs = "OMNICAL" -n_time_neighbors = 1 +n_curr_time_neighbors = 1 args = ["{basename}", "{prev_basename}", "{next_basename}"] [OMNI_APPLY] prereqs = "OMNICAL_METRICS" -n_time_neighbors = 1 +n_curr_time_neighbors = 1 stride_length = 2 args = "{basename}" diff --git a/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml b/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml index 7c22a834..2423696f 100644 --- a/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml +++ b/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml @@ -15,14 +15,14 @@ actions = ["XRFI"] [XRFI] args = "{basename}" -n_time_neighbors = 'all' +n_curr_time_neighbors = 'all' [XRFI_CENTERED] args = "{basename}" -n_time_neighbors = 'all' +n_curr_time_neighbors = 'all' time_centered = true [XRFI_NOT_CENTERED] args = "{basename}" -n_time_neighbors = 'all' +n_curr_time_neighbors = 'all' time_centered = false diff --git a/hera_opm/tests/test_mf_tools.py b/hera_opm/tests/test_mf_tools.py index 34f7838e..20921206 100644 --- a/hera_opm/tests/test_mf_tools.py +++ b/hera_opm/tests/test_mf_tools.py @@ -149,17 +149,17 @@ def test_get_config_entry_total_length(config_options): # Default is to time center assert ( - mt.get_config_entry(config, "XRFI", "n_time_neighbors", total_length=15) == "7" + mt.get_config_entry(config, "XRFI", "n_curr_time_neighbors", total_length=15) == "7" ) assert ( mt.get_config_entry( - config, "XRFI_CENTERED", "n_time_neighbors", total_length=21 + config, "XRFI_CENTERED", "n_curr_time_neighbors", total_length=21 ) == "10" ) assert ( mt.get_config_entry( - config, "XRFI_NOT_CENTERED", "n_time_neighbors", total_length=7 + config, "XRFI_NOT_CENTERED", "n_curr_time_neighbors", total_length=7 ) == "6" ) @@ -337,7 +337,7 @@ def test_determine_stride_partitioning(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=1, - n_time_neighbors=2, + n_curr_time_neighbors=2, time_centered=True, collect_stragglers=False, ) @@ -381,7 +381,7 @@ def test_determine_stride_partitioning_defaults(config_options): # run again with n_time_neighbors = 2 primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( - input_obsids, n_time_neighbors=2, time_centered=True, collect_stragglers=False, + input_obsids, n_curr_time_neighbors=2, time_centered=True, collect_stragglers=False, ) # the results should be the same as in test_determine_stride_partitioning assert primary_obsids == list(input_obsids[2:-2]) @@ -407,7 +407,7 @@ def test_determine_stride_partitioning_collect_stragglers(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=4, - n_time_neighbors=3, + n_curr_time_neighbors=3, time_centered=False, collect_stragglers=True, ) @@ -437,17 +437,17 @@ def test_determine_stride_partitioning_errors(config_options): ValueError, match="stride_length must be able to be interpreted as an int" ): mt._determine_stride_partitioning( - input_obsids, stride_length="foo", n_time_neighbors=1 + input_obsids, stride_length="foo", n_curr_time_neighbors=1 ) with pytest.raises( - ValueError, match="n_time_neighbors must be able to be interpreted as an int" + ValueError, match="n_curr_time_neighbors must be able to be interpreted as an int" ): - mt._determine_stride_partitioning(input_obsids, n_time_neighbors="foo") + mt._determine_stride_partitioning(input_obsids, n_curr_time_neighbors="foo") with pytest.raises(ValueError, match="time_centered must be a boolean variable"): mt._determine_stride_partitioning( - input_obsids, stride_length=1, n_time_neighbors=1, time_centered="False", + input_obsids, stride_length=1, n_curr_time_neighbors=1, time_centered="False", ) with pytest.raises( @@ -456,7 +456,7 @@ def test_determine_stride_partitioning_errors(config_options): mt._determine_stride_partitioning( input_obsids, stride_length=1, - n_time_neighbors=1, + n_curr_time_neighbors=1, time_centered=False, collect_stragglers="True", ) @@ -470,7 +470,7 @@ def test_determine_stride_partitioning_noncontiguous_stragglers(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=10, - n_time_neighbors=1, + n_curr_time_neighbors=1, time_centered=False, collect_stragglers=True, ) @@ -1165,7 +1165,7 @@ def test_prep_args_obsid_list(config_options): args, obsid, obsids=obsids_list, - n_time_neighbors="1", + n_curr_time_neighbors="1", time_centered=None, collect_stragglers=False, ) @@ -1185,7 +1185,7 @@ def test_prep_args_obsid_list_centered(config_options): args, obsid, obsids=obsids_list, - n_time_neighbors="1", + n_curr_time_neighbors="1", time_centered=True, collect_stragglers=False, ) @@ -1205,7 +1205,7 @@ def test_prep_args_obsid_list_not_centered(config_options): args, obsid, obsids=obsids_list, - n_time_neighbors="1", + n_curr_time_neighbors="1", time_centered=False, collect_stragglers=False, ) @@ -1225,7 +1225,7 @@ def test_prep_args_obsid_list_with_stragglers(config_options): args, obsid, obsids=obsids_list, - n_time_neighbors="1", + n_curr_time_neighbors="1", stride_length="2", time_centered=False, collect_stragglers=True, @@ -1247,18 +1247,18 @@ def test_prep_args_obsid_list_error(config_options): args, obsid, obsids=obsids_list, - n_time_neighbors="foo", + n_curr_time_neighbors="foo", time_centered=True, collect_stragglers=False, ) - assert str(cm.value).startswith("n_time_neighbors must be able to be interpreted") + assert str(cm.value).startswith("n_curr_time_neighbors must be able to be interpreted") with pytest.raises(ValueError) as cm: args = mt.prep_args( args, obsid, obsids=obsids_list, - n_time_neighbors="1", + n_curr_time_neighbors="1", stride_length="foo", time_centered=True, collect_stragglers=False, From 09989bde0ad97423313540881bddb918409f41c5 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Tue, 7 Jul 2020 12:29:33 -0700 Subject: [PATCH 03/22] distinguish prev and curr time_neighbors --- hera_opm/mf_tools.py | 74 ++++++++++--------- .../bad_configs/bad_example_obsid_list.toml | 2 +- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 9f75f60f..e963cb41 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -94,7 +94,7 @@ def get_config_entry( of the config file, an error is raised. Default is True. total_length : int, optional If this parameter belongs to the special group of - [stride_length, n_time_neighbors], + [stride_length, n_curr_time_neighbors], the entry will be further parsed to interpret 'all', and be replaced with (total_length - 1) // 2 if time_centered is True (default) or with (total_length - 1) if time_centered is False. @@ -120,7 +120,7 @@ def get_config_entry( entries[i] = _interpolate_config(config, entry) else: entries = _interpolate_config(config, entries) - if item in ["stride_length", "n_time_neighbors"]: + if item in ["stride_length", "n_curr_time_neighbors"]: time_centered = get_config_entry( config, header, "time_centered", required=False ) @@ -381,7 +381,7 @@ def process_batch_options( def _determine_stride_partitioning( obsids, stride_length=None, - n_time_neighbors=None, + n_curr_time_neighbors=None, time_centered=None, collect_stragglers=None, ): @@ -392,16 +392,16 @@ def _determine_stride_partitioning( The list of obsids. stride_length : int, optional Length of the stride. Default is 1. - n_time_neighbors : int, optional + n_curr_time_neighbors : int, optional Number of time neighbors. Optional, default is 0. time_centered : bool, optional - Whether to center the obsid and select n_time_neighbors on either side, - returning a total of 2 * n_time_neighbors + 1 obsids (True, default), or + Whether to center the obsid and select n_curr_time_neighbors on either side, + returning a total of 2 * n_curr_time_neighbors + 1 obsids (True, default), or a group starting with the selected obsid and a total of length - n_time_neighbors + 1 (False). + n_curr_time_neighbors + 1 (False). collect_stragglers : bool, optional When the list of files to work on is not divided evenly by the - combination of stride_length and n_time_neighbors, this option specifies + combination of stride_length and n_curr_time_neighbors, this option specifies whether to include the straggler files into the last group (True) or treat them as their own small group (False, default). @@ -419,13 +419,13 @@ def _determine_stride_partitioning( workflow. An obsid may have itself as a primary obsid (e.g., if stride_length == 1, then each obsid will have itself, as well as its time neighbors, as primary obsids). If `stride_length` and - `n_time_neighbors` are such that there are obsids that do not belong to + `n_curr_time_neighbors` are such that there are obsids that do not belong to any group, then the value is an empty list. """ if stride_length is None: stride_length = 1 - if n_time_neighbors is None: - n_time_neighbors = 0 + if n_curr_time_neighbors is None: + n_curr_time_neighbors = 0 if time_centered is None: time_centered = True if collect_stragglers is None: @@ -433,9 +433,9 @@ def _determine_stride_partitioning( obsids = sort_obsids(obsids) try: - n_time_neighbors = int(n_time_neighbors) + n_curr_time_neighbors = int(n_curr_time_neighbors) except ValueError: - raise ValueError("n_time_neighbors must be able to be interpreted as an int.") + raise ValueError("n_curr_time_neighbors must be able to be interpreted as an int.") try: stride_length = int(stride_length) except ValueError: @@ -454,16 +454,16 @@ def _determine_stride_partitioning( primary_obsids = [] per_obsid_primary_obsids = [[] for i in range(len(obsids))] - for idx in range(time_centered * n_time_neighbors, len(obsids), stride_length): + for idx in range(time_centered * n_curr_time_neighbors, len(obsids), stride_length): # Compute the number of remaining obsids to process. # We account for the location of the next stride to determine if we # should grab straggling obsids. n_following = len(obsids) - (idx + stride_length) if time_centered: - i1 = max(idx - n_time_neighbors, 0) + i1 = max(idx - n_curr_time_neighbors, 0) else: i1 = idx - i2 = idx + n_time_neighbors + 1 + i2 = idx + n_curr_time_neighbors + 1 # Check to see if i2 would be past the end of the array. If # `collect_stragglers` is True, then we would have broken out of the # loop on the iteration previous to the current one. Otherwise we drop @@ -471,12 +471,12 @@ def _determine_stride_partitioning( # make a full set. if i2 > len(obsids): break - if n_following < (n_time_neighbors + 1) and collect_stragglers: + if n_following < (n_curr_time_neighbors + 1) and collect_stragglers: # Figure out if any observations that would normally have been skipped # will be lumped in by getting all remaining observations. # "stride_length - 1" is the actual number of observations between # current idx and next one given stride_length. - gap = (stride_length - 1) - n_time_neighbors * (1 + time_centered) + gap = (stride_length - 1) - n_curr_time_neighbors * (1 + time_centered) if gap > 0: warnings.warn( "Collecting stragglers is incompatible with gaps between " @@ -503,7 +503,7 @@ def prep_args( args, obsid, obsids=None, - n_time_neighbors="0", + n_curr_time_neighbors="0", stride_length="1", time_centered=None, collect_stragglers=None, @@ -520,16 +520,16 @@ def prep_args( Filename/obsid to be substituted. obsids : list of str, optional Full list of obsids. Required when time-adjacent neighbors are desired. - n_time_neighbors : str + n_curr_time_neighbors : str Number of neighboring time files to append to list. If set to the string "all", then all neighbors from that JD are added. stride_length : str Number of files to include in a stride. This interacts with - `n_time_neighbors` to define how arguments are generate. + `n_curr_time_neighbors` to define how arguments are generate. time_centered : bool, optional Whether the provided obsid should be in the center of the neighbors. - If True (default), returns n_time_neighbors on either side of obsid. - If False, returns original obsid _and_ n_time_neighbors following. + If True (default), returns n_curr_time_neighbors on either side of obsid. + If False, returns original obsid _and_ n_curr_time_neighbors following. collect_stragglers : bool, optional Whether to lump files close to the end of the list ("stragglers") into the previous group, or belong to their own smaller group. @@ -596,7 +596,7 @@ def prep_args( _, per_obsid_primary_obsids = _determine_stride_partitioning( obsids, stride_length=stride_length, - n_time_neighbors=n_time_neighbors, + n_curr_time_neighbors=n_curr_time_neighbors, time_centered=time_centered, collect_stragglers=collect_stragglers, ) @@ -748,19 +748,19 @@ def build_analysis_makeflow_from_config( if idx != len(workflow) - 1: raise ValueError("TEARDOWN must be last entry of workflow") - # Check for actions that use n_time_neighbors, make sure obsid_list is last arg + # Check for actions that use n_curr_time_neighbors, make sure obsid_list is last arg for action in workflow: - n_time_neighbors = get_config_entry( - config, action, "n_time_neighbors", required=False, total_length=len(obsids) + n_curr_time_neighbors = get_config_entry( + config, action, "n_curr_time_neighbors", required=False, total_length=len(obsids) ) - if n_time_neighbors is not None: + if n_curr_time_neighbors is not None: this_args = get_config_entry(config, action, "args", required=True) if "{obsid_list}" in this_args: bn_idx = this_args.index("{obsid_list}") if bn_idx != len(this_args) - 1: raise ValueError( "{obsid_list} must be the last argument for action" - f" {action} because n_time_neighbors is specified." + f" {action} because n_curr_time_neighbors is specified." ) path_to_do_scripts = get_config_entry(config, "Options", "path_to_do_scripts") @@ -912,10 +912,16 @@ def build_analysis_makeflow_from_config( required=False, total_length=len(obsids), ) - n_time_neighbors = get_config_entry( + n_prev_time_neighbors = get_config_entry( + config, + action, + "n_prev_time_neighbors", + required=False, + ) + n_curr_time_neighbors = get_config_entry( config, action, - "n_time_neighbors", + "n_curr_time_neighbors", required=False, total_length=len(obsids), ) @@ -935,7 +941,7 @@ def build_analysis_makeflow_from_config( ) = _determine_stride_partitioning( sorted_obsids, stride_length=stride_length, - n_time_neighbors=n_time_neighbors, + n_curr_time_neighbors=n_curr_time_neighbors, time_centered=time_centered, collect_stragglers=collect_stragglers, ) @@ -1015,7 +1021,7 @@ def build_analysis_makeflow_from_config( filename, prereq, obsids, - n_time_neighbors=n_time_neighbors, + n_time_neighbors=n_prev_time_neighbors, time_centered=time_centered, stride_length=stride_length, collect_stragglers=collect_stragglers, @@ -1040,7 +1046,7 @@ def build_analysis_makeflow_from_config( args, filename, obsids=obsids, - n_time_neighbors=n_time_neighbors, + n_curr_time_neighbors=n_curr_time_neighbors, stride_length=stride_length, time_centered=time_centered, collect_stragglers=collect_stragglers, diff --git a/hera_opm/tests/bad_configs/bad_example_obsid_list.toml b/hera_opm/tests/bad_configs/bad_example_obsid_list.toml index aa0604b8..48ce1529 100644 --- a/hera_opm/tests/bad_configs/bad_example_obsid_list.toml +++ b/hera_opm/tests/bad_configs/bad_example_obsid_list.toml @@ -20,7 +20,7 @@ actions = ["SETUP", "ANT_METRICS", "FIRSTCAL", "FIRSTCAL_METRICS", [ANT_METRICS] args = "{obsid_list} {basename}" stride_length = 4 -n_time_neighbors = 2 +n_curr_time_neighbors = 2 [FIRSTCAL] args = ["{basename}", "${Options:ex_ants}"] From bb62777a9d75c84da9a8bad95f3f01f0e4f5dd24 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Tue, 7 Jul 2020 12:30:14 -0700 Subject: [PATCH 04/22] update prechunk toml for new api --- .../h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 8d7c75ba..df4b29cd 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -116,7 +116,7 @@ args = "{basename}" [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" -n_time_neighbors = "all" +n_prev_time_neighbors = "all" stride_length = 1 time_centered = false mem = 128000 @@ -140,15 +140,13 @@ prereqs = "XRFI" args = ["${XRFI_DAY_THRESHOLD_OPTS:nsig_f}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f_adj}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t_adj}", "{obsid_list}"] -n_time_neighbors = "all" +n_curr_time_neighbors = "all" stride_length = "all" time_centered = false [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" -n_time_neighbors = "all" -stride_length = 1 -time_centered = false +n_prev_time_neighbors = "all" mem = 128000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -157,30 +155,22 @@ args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_s [UPDATE_OMNISOL] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" -stride_length = 1 -time_centered = false +n_prev_time_neighbors = "all" args = ["{basename}"] [NOISE] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" -stride_length = 1 -time_centered = false +n_prev_time_neighbors = "all" args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" -stride_length = 1 -time_centered = false +n_prev_time_neighbors = "all" mem = 96000 args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" -n_time_neighbors = "all" -stride_length = 1 -time_centered = false +n_prev_time_neighbors = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] From 79a4ba047ab8c9e36b28fd7a078161831321e53e Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Tue, 7 Jul 2020 13:18:47 -0700 Subject: [PATCH 05/22] use absolute paths to obsids --- hera_opm/mf_tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index e963cb41..72c00c6a 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -720,6 +720,9 @@ def build_analysis_makeflow_from_config( adjacent to "{basename}", useful for specifying prereqs """ + # Make obsids abs paths + obsids = [os.path.abspath(obsid) for obsid in obsids] + # make a cache dictionary _cache_dict = {} From 22a2272e6a57d9b85d95891b01d2d52d587cc4fe Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 8 Jul 2020 11:00:06 -0700 Subject: [PATCH 06/22] properly get prereqs from curr_time_neighbors --- hera_opm/mf_tools.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 72c00c6a..e16c8b61 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -1020,7 +1020,7 @@ def build_analysis_makeflow_from_config( "workflow".format(prereq, action) ) # add neighbors - neighbors = make_time_neighbor_list( + prev_neighbors = make_time_neighbor_list( filename, prereq, obsids, @@ -1029,11 +1029,21 @@ def build_analysis_makeflow_from_config( stride_length=stride_length, collect_stragglers=collect_stragglers, ) + curr_neighbors = make_time_neighbor_list( + filename, + prereq, + obsids, + n_time_neighbors=n_curr_time_neighbors, + time_centered=time_centered, + stride_length=stride_length, + collect_stragglers=collect_stragglers, + ) + all_neighbors = prev_neighbors + curr_neighbors pr_outfiles = [] key = prereq + "_per_obsid_primary_obsids" per_obsid_primary_obsids = _cache_dict[key] for oi, obs in enumerate(obsids): - if os.path.basename(obs) in neighbors: + if os.path.basename(obs) in all_neighbors: for primary_obsid in per_obsid_primary_obsids[oi]: if primary_obsid not in pr_outfiles: pr_outfiles.append(primary_obsid) From c6f05332b7d1b24d7adcdb30d1990731be695e94 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 8 Jul 2020 11:41:12 -0700 Subject: [PATCH 07/22] Use vanilla xrfi when data are prechunked --- .../h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 6 ++--- .../idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index df4b29cd..928f73fe 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -128,11 +128,11 @@ args = ["{basename}", "${ABSCAL_OPTS:model_files_glob}", "${ABSCAL_OPTS:nInt_to_ "${ABSCAL_OPTS:min_bl_cut}", "${ABSCAL_OPTS:max_bl_cut}", "${ABSCAL_OPTS:phs_max_iter}", "${ABSCAL_OPTS:phs_conv_crit}", "${ABSCAL_OPTS:edge_cut}"] -[XRFI] +[XRFI_PRECHUNK] mem = 96000 prereqs = "ABSCAL" -args = ["${XRFI_OPTS:kt_size}", "${XRFI_OPTS:kf_size}", "${XRFI_OPTS:sig_init}", - "${XRFI_OPTS:sig_adj}", "{obsid_list}"] +args = ["{basename}", "${XRFI_OPTS:kt_size}", "${XRFI_OPTS:kf_size}", "${XRFI_OPTS:sig_init}", + "${XRFI_OPTS:sig_adj}"] [XRFI_DAY_THRESHOLD] diff --git a/pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh b/pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh new file mode 100644 index 00000000..d3398cfc --- /dev/null +++ b/pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh @@ -0,0 +1,27 @@ +#! /bin/bash +set -e + +# import common functions +src_dir="$(dirname "$0")" +source ${src_dir}/_common.sh +fn="${1}" # Filename +bn=$(basename ${1}) # Basename + +# We need to run xrfi on calibration outputs as preliminary flags before we +# delay filter and run xrfi on visibilities. + +# Parameters are set in the configuration file, here we define their positions, +# which must be consistent with the config. +# 1 - filename +### XRFI parameters - see hera_qm.utils for details +# 2 - kt_size +# 3 - kf_size +# 4 - sig_init +# 5 - sig_adj + +ocalfits_file=`echo ${fn%.*}.omni.calfits` +acalfits_file=`echo ${fn%.*}.abs.calfits` +model_file=`echo ${fn%.*}.omni_vis.uvh5` + +echo xrfi_run.py --ocalfits_file=${ocalfits_file} --acalfits_file=${acalfits_file} --model_file=${model_file} --data_file=${fn} --kt_size=${2} --kf_size=${3} --sig_init=${4} --sig_adj=${5} --clobber +xrfi_run.py --ocalfits_file=${ocalfits_file} --acalfits_file=${acalfits_file} --model_file=${model_file} --data_file=${fn} --kt_size=${2} --kf_size=${3} --sig_init=${4} --sig_adj=${5} --clobber From c4019492e08824f9786f5c69872a81d07cfaa255 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 8 Jul 2020 12:20:56 -0700 Subject: [PATCH 08/22] xrfi_prechunk to workflow --- pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 928f73fe..1da2292a 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -39,7 +39,7 @@ phs_max_iter = 100 phs_conv_crit = 1e-6 edge_cut = 100 -[XRFI_OPTS] +[XRFI_PRECHUNK_OPTS] kt_size = 8 kf_size = 8 sig_init = 5.0 @@ -78,7 +78,7 @@ actions = ["FIX_DATAFILE", "REDCAL", "FIRSTCAL_METRICS", "ABSCAL", - "XRFI", + "XRFI_PRECHUNK", "XRFI_DAY_THRESHOLD", "CAL_SMOOTH", "MAKE_REDCAL_NOTEBOOK", @@ -131,8 +131,8 @@ args = ["{basename}", "${ABSCAL_OPTS:model_files_glob}", "${ABSCAL_OPTS:nInt_to_ [XRFI_PRECHUNK] mem = 96000 prereqs = "ABSCAL" -args = ["{basename}", "${XRFI_OPTS:kt_size}", "${XRFI_OPTS:kf_size}", "${XRFI_OPTS:sig_init}", - "${XRFI_OPTS:sig_adj}"] +args = ["{basename}", "${XRFI_PRECHUNK_OPTS:kt_size}", "${XRFI_PRECHUNK_OPTS:kf_size}", + "${XRFI_PRECHUNK_OPTS:sig_init}", "${XRFI_PRECHUNK_OPTS:sig_adj}"] [XRFI_DAY_THRESHOLD] From d946c47c5f2b85a017fdbcc7659e96af0e94230a Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Wed, 8 Jul 2020 12:50:45 -0700 Subject: [PATCH 09/22] fix prereq --- pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 1da2292a..cfdc4034 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -136,7 +136,7 @@ args = ["{basename}", "${XRFI_PRECHUNK_OPTS:kt_size}", "${XRFI_PRECHUNK_OPTS:kf_ [XRFI_DAY_THRESHOLD] -prereqs = "XRFI" +prereqs = "XRFI_PRECHUNK" args = ["${XRFI_DAY_THRESHOLD_OPTS:nsig_f}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f_adj}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t_adj}", "{obsid_list}"] From 32e95e2bd5a49badaa19da5287844f2253aaa2f8 Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Wed, 8 Jul 2020 13:24:18 -0700 Subject: [PATCH 10/22] update permissions --- .../h3c/idr2/v1/task_scripts/do_DAYENU_FOREGROUND_SUBTRACTION.sh | 0 pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 pipelines/h3c/idr2/v1/task_scripts/do_DAYENU_FOREGROUND_SUBTRACTION.sh mode change 100644 => 100755 pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh diff --git a/pipelines/h3c/idr2/v1/task_scripts/do_DAYENU_FOREGROUND_SUBTRACTION.sh b/pipelines/h3c/idr2/v1/task_scripts/do_DAYENU_FOREGROUND_SUBTRACTION.sh old mode 100644 new mode 100755 diff --git a/pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh b/pipelines/h3c/idr2/v1/task_scripts/do_XRFI_PRECHUNK.sh old mode 100644 new mode 100755 From 7544c32ca13eead834a14930207998a91ac36bcc Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Thu, 9 Jul 2020 10:37:06 -0700 Subject: [PATCH 11/22] up redcal mem --- pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index a182b732..95d98515 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -104,6 +104,7 @@ args = ["{basename}", "${ANT_METRICS_OPTS:crossCut}", [REDCAL] prereqs = "ANT_METRICS" +mem = 24000 args = ["{basename}", "${Options:ex_ants_path}", "${REDCAL_OPTS:ant_z_thresh}", "${REDCAL_OPTS:solar_horizon}", "${REDCAL_OPTS:flag_nchan_low}", "${REDCAL_OPTS:flag_nchan_high}", "${REDCAL_OPTS:oc_maxiter}", From 0ab61bfb3732efccd7cff4b6a865ee7e98aa581e Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Thu, 9 Jul 2020 17:09:09 -0700 Subject: [PATCH 12/22] respecialize for 2458937 --- pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 95d98515..c5363953 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -4,8 +4,8 @@ path_to_do_scripts = "/users/jsdillon/Libraries/hera_opm/pipelines/h3c/idr2/v1/t source_script = "~/.bashrc" conda_env = "python3" ex_ants_path = "/users/jsdillon/Libraries/hera_opm/pipelines/h3c/idr2/v1/bad_ants" -# base_mem = 32000 -base_mem = 16000 +base_mem = 32000 +# base_mem = 16000 base_cpu = 1 timeout = "24h" @@ -28,8 +28,8 @@ oc_maxiter = 5000 min_bl_cut = 15 max_bl_cut = 100 iter0_prefix = ".iter0" -good_statuses = "passed_checks,needs_checking,digital_ok,digital_maintenance,calibration_maintenance,calibration_triage,calibration_ok" -# good_statuses = "passed_checks,needs_checking,digital_ok,digital_maintenance,calibration_maintenance,calibration_triage,calibration_ok,dish_maintenance,dish_ok,RF_maintenance,RF_ok,known_bad,not_connected" +# good_statuses = "passed_checks,needs_checking,digital_ok,digital_maintenance,calibration_maintenance,calibration_triage,calibration_ok" +good_statuses = "passed_checks,needs_checking,digital_ok,digital_maintenance,calibration_maintenance,calibration_triage,calibration_ok,dish_maintenance,dish_ok,RF_maintenance,RF_ok,known_bad,not_connected" [ABSCAL_OPTS] model_files_glob = "'/lustre/aoc/projects/hera/H3C/abscal_models/RIMEz_v2_No_Outriggers/zen.2458894.?????.uvh5'" @@ -104,7 +104,7 @@ args = ["{basename}", "${ANT_METRICS_OPTS:crossCut}", [REDCAL] prereqs = "ANT_METRICS" -mem = 24000 +# mem = 24000 args = ["{basename}", "${Options:ex_ants_path}", "${REDCAL_OPTS:ant_z_thresh}", "${REDCAL_OPTS:solar_horizon}", "${REDCAL_OPTS:flag_nchan_low}", "${REDCAL_OPTS:flag_nchan_high}", "${REDCAL_OPTS:oc_maxiter}", @@ -131,8 +131,8 @@ args = ["{basename}", "${ABSCAL_OPTS:model_files_glob}", "${ABSCAL_OPTS:nInt_to_ "${ABSCAL_OPTS:phs_conv_crit}", "${ABSCAL_OPTS:edge_cut}"] [XRFI_PRECHUNK] -# mem = 96000 -mem = 64000 +mem = 96000 +# mem = 64000 prereqs = "ABSCAL" args = ["{basename}", "${XRFI_PRECHUNK_OPTS:kt_size}", "${XRFI_PRECHUNK_OPTS:kf_size}", "${XRFI_PRECHUNK_OPTS:sig_init}", "${XRFI_PRECHUNK_OPTS:sig_adj}"] @@ -171,8 +171,8 @@ prereqs = "CAL_SMOOTH" n_time_neighbors = "all" stride_length = 1 time_centered = false -# mem = 96000 -mem = 64000 +mem = 96000 +# mem = 64000 args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] [MAKE_NOTEBOOK] From cdbc7c3b245238ad0238914aa857ac617b06c6ee Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Thu, 9 Jul 2020 17:25:31 -0700 Subject: [PATCH 13/22] update lst blacklists for new RIMEz for 2458838 --- pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index c5363953..5a93130e 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -63,7 +63,7 @@ alpha = 0.3 freq_threshold = 1.0 time_threshold = 1.0 ant_threshold = 0.5 -lst_blacklists = "0-1.3 1.84-1.9 2.1-2.19 2.27-2.47 2.5-4.3 5.25-5.56 5.59-5.7 5.89-5.96 6.11-6.32 6.5-9.1 9.37-9.44 9.51-9.58 10.9-11.5 11.9-14.3 16.3-1.3" +lst_blacklists = "0-1.3 2.5-4.3 5.0-5.7 6.5-9.1 10.6-11.5 11.9-14.3 16.3-1.3" [IMAGING_OPTS] casa = "/home/casa/packages/RHEL7/release/casa-release-5.1.0-68/bin/casa" From 73c05d2b87a56f316a4b3b7bb4e883b5212f662b Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 15 Jul 2020 12:53:56 -0700 Subject: [PATCH 14/22] revamp api again to make parameters make more sense --- ...eighbors.toml => nrao_rtp_chunk_size.toml} | 4 +- ..._all.toml => nrao_rtp_chunk_size_all.toml} | 7 +- .../sample_config/nrao_rtp_stride_length.toml | 4 +- hera_opm/mf_tools.py | 181 +++++++++--------- .../bad_configs/bad_example_obsid_list.toml | 2 +- hera_opm/tests/test_mf_tools.py | 101 +++++----- 6 files changed, 142 insertions(+), 157 deletions(-) rename hera_opm/data/sample_config/{nrao_rtp_time_neighbors.toml => nrao_rtp_chunk_size.toml} (96%) rename hera_opm/data/sample_config/{nrao_rtp_time_neighbors_all.toml => nrao_rtp_chunk_size_all.toml} (86%) diff --git a/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml b/hera_opm/data/sample_config/nrao_rtp_chunk_size.toml similarity index 96% rename from hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml rename to hera_opm/data/sample_config/nrao_rtp_chunk_size.toml index 68ae63a4..f476a38a 100644 --- a/hera_opm/data/sample_config/nrao_rtp_time_neighbors.toml +++ b/hera_opm/data/sample_config/nrao_rtp_chunk_size.toml @@ -29,12 +29,12 @@ args = ["{basename}", "${Options:ex_ants}"] [OMNICAL_METRICS] prereqs = "OMNICAL" -n_curr_time_neighbors = 1 +chunk_size = 3 args = ["{basename}", "{prev_basename}", "{next_basename}"] [OMNI_APPLY] prereqs = "OMNICAL_METRICS" -n_curr_time_neighbors = 1 +chunk_size = 3 stride_length = 2 args = "{basename}" diff --git a/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml b/hera_opm/data/sample_config/nrao_rtp_chunk_size_all.toml similarity index 86% rename from hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml rename to hera_opm/data/sample_config/nrao_rtp_chunk_size_all.toml index 2423696f..fd33a580 100644 --- a/hera_opm/data/sample_config/nrao_rtp_time_neighbors_all.toml +++ b/hera_opm/data/sample_config/nrao_rtp_chunk_size_all.toml @@ -15,14 +15,15 @@ actions = ["XRFI"] [XRFI] args = "{basename}" -n_curr_time_neighbors = 'all' +chunk_size = 'all' +stride_length = 'all' [XRFI_CENTERED] args = "{basename}" -n_curr_time_neighbors = 'all' +chunk_size = 'all' time_centered = true [XRFI_NOT_CENTERED] args = "{basename}" -n_curr_time_neighbors = 'all' +chunk_size = 'all' time_centered = false diff --git a/hera_opm/data/sample_config/nrao_rtp_stride_length.toml b/hera_opm/data/sample_config/nrao_rtp_stride_length.toml index 8bcb41c3..bf56b2b0 100644 --- a/hera_opm/data/sample_config/nrao_rtp_stride_length.toml +++ b/hera_opm/data/sample_config/nrao_rtp_stride_length.toml @@ -30,12 +30,12 @@ args = ["{basename}", "${Options:ex_ants}"] [OMNICAL_METRICS] prereqs = "OMNICAL" -n_curr_time_neighbors = 1 +chunk_size = 3 args = ["{basename}", "{prev_basename}", "{next_basename}"] [OMNI_APPLY] prereqs = "OMNICAL_METRICS" -n_curr_time_neighbors = 1 +chunk_size = 3 stride_length = 2 # args = ["{basename}", "{obsid_list}"] args = "{basename}" diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index e16c8b61..3171d67f 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -93,11 +93,9 @@ def get_config_entry( "${header:item}". If the corresponding key is not defined in that part of the config file, an error is raised. Default is True. total_length : int, optional - If this parameter belongs to the special group of - [stride_length, n_curr_time_neighbors], + If this parameter is `stride_length`, the entry will be further parsed to interpret 'all', and be replaced - with (total_length - 1) // 2 if time_centered is True (default) or - with (total_length - 1) if time_centered is False. + with `total_length`. Returns ------- @@ -120,15 +118,8 @@ def get_config_entry( entries[i] = _interpolate_config(config, entry) else: entries = _interpolate_config(config, entries) - if item in ["stride_length", "n_curr_time_neighbors"]: - time_centered = get_config_entry( - config, header, "time_centered", required=False - ) - if entries == "all": - if time_centered or time_centered is None: - entries = str((total_length - 1) // 2) - else: - entries = str(total_length - 1) + if (item in ["stride_length"]) and (entries == "all"): + entries = str(total_length) return entries except KeyError: if not required: @@ -199,11 +190,11 @@ def sort_obsids(obsids, jd=None, return_basenames=False): return sorted_obsids -def make_time_neighbor_list( +def make_chunk_list( obsid, action, obsids, - n_time_neighbors=None, + chunk_size=None, time_centered=None, stride_length=None, collect_stragglers=None, @@ -221,18 +212,20 @@ def make_time_neighbor_list( obsids : list of str A list of all obsids for the given day; uses this list (sorted) to define neighbors - n_time_neighbors : str - Number of neighboring time files to append to list. If set to the - string "all", then all neighbors from that JD are added. Default is "0" + chunk_size : str + Number of obsids to include in the list. If set to the + string "all", then all neighbors from that JD are added. Default is "1" + (just the obsid). time_centered : bool, optional - Whether the provided obsid should be in the center of the neighbors. - If True (default), returns n_time_neighbors on either side of obsid. - If False, returns original obsid _and_ n_time_neighbors following. + Whether the provided obsid should be in the center of the chunk. + If True (default), returns (n_chunk - 1) // 2 on either side of obsid. + If n_chunk is even, there will be one more obsid on the left. + If False, returns original obsid _and_ (chunk_size - 1) following. stride_length : str, optional Length of the stride. Default is "1". collect_stragglers : bool, optional When the list of files to work on is not divided evenly by the - combination of stride_length and n_time_neighbors, this option specifies + combination of stride_length and n_chunk, this option specifies whether to include the straggler files into the last group (True) or treat them as their own small group (False, default). return_outfiles : bool, optional @@ -240,7 +233,7 @@ def make_time_neighbor_list( Returns ------- - neighbors : list of str + chunk_list : list of str A list of obsids or files (depending on outfile keyword) for time-adjacent neighbors. @@ -248,19 +241,19 @@ def make_time_neighbor_list( ------ ValueError Raised if the specified obsid is not present in the full list, if - `n_time_neighbors` cannot be parsed as an int, or if `n_time_neighbors` - is negative. + `chunk_size` cannot be parsed as an int, or if `chunk_size` + is less than 1. """ if time_centered is None: time_centered = True - if n_time_neighbors is None: - n_time_neighbors = "0" + if chunk_size is None: + chunk_size = "1" if stride_length is None: stride_length = "1" if collect_stragglers is None: collect_stragglers = False - neighbors = [] + chunk_list = [] # extract the integer JD of the current file jd = get_jd(obsid) @@ -273,24 +266,28 @@ def make_time_neighbor_list( except ValueError: raise ValueError("obsid {} not found in list of obsids".format(obsid)) - if n_time_neighbors == "all": + if chunk_size == "all": i0 = 0 i1 = len(obsids) else: # assume we got an integer as a string; try to make sense of it try: - n_time_neighbors = int(n_time_neighbors) + chunk_size = int(chunk_size) except ValueError: - raise ValueError("n_time_neighbors must be parsable as an int") - if n_time_neighbors < 0: - raise ValueError("n_time_neighbors must be an integer >= 0.") - # get n_time_neighbors before and after; make sure we don't have an IndexError - i0 = max(obs_idx - time_centered * n_time_neighbors, 0) - i1 = min(obs_idx + n_time_neighbors + 1, len(obsids)) + raise ValueError("chunk_size must be parsable as an int") + if chunk_size < 1: + raise ValueError("chunk_size must be an integer >= 1.") + # get obsids before and after; make sure we don't have an IndexError + if time_centered: + i0 = max(obs_idx - chunk_size // 2, 0) + i1 = min(obs_idx + (chunk_size + 1) // 2, len(obsids)) + else: + i0 = obs_idx + i1 = min(obs_idx + chunk_size, len(obsids)) n_following = len(obsids) - (obs_idx + int(stride_length)) - if n_following < (n_time_neighbors + 1) and collect_stragglers: - # see _determine_stride_partitioning() for more explanation - gap = (int(stride_length) - 1) - n_time_neighbors * (1 + time_centered) + if (n_following < chunk_size) and collect_stragglers: + # Calculate number of obsids that are skipped between strides + gap = int(stride_length) - chunk_size if gap > 0: warnings.warn( "Collecting stragglers is incompatible with gaps between " @@ -299,15 +296,15 @@ def make_time_neighbor_list( else: i1 = len(obsids) - # build list of neighbors + # build list of obsids for i in range(i0, i1): - neighbors.append(obsids[i]) + chunk_list.append(obsids[i]) # finalize the names of files if return_outfiles: - neighbors = [make_outfile_name(of, action)[0] for of in neighbors] + chunk_list = [make_outfile_name(of, action)[0] for of in chunk_list] - return neighbors + return chunk_list def process_batch_options( @@ -381,7 +378,7 @@ def process_batch_options( def _determine_stride_partitioning( obsids, stride_length=None, - n_curr_time_neighbors=None, + chunk_size=None, time_centered=None, collect_stragglers=None, ): @@ -392,16 +389,16 @@ def _determine_stride_partitioning( The list of obsids. stride_length : int, optional Length of the stride. Default is 1. - n_curr_time_neighbors : int, optional - Number of time neighbors. Optional, default is 0. + chunk_size : int, optional + Number of obsids in a chunk. Optional, default is 1. time_centered : bool, optional - Whether to center the obsid and select n_curr_time_neighbors on either side, - returning a total of 2 * n_curr_time_neighbors + 1 obsids (True, default), or - a group starting with the selected obsid and a total of length - n_curr_time_neighbors + 1 (False). + Whether to center the obsid and select chunk_size // 2 on either side + (True, default), or a group starting with the selected obsid (False). + If `time_centered` is True and `chunk_size` is even, there will be + one more obsid to the left. collect_stragglers : bool, optional When the list of files to work on is not divided evenly by the - combination of stride_length and n_curr_time_neighbors, this option specifies + combination of stride_length and chunk_size, this option specifies whether to include the straggler files into the last group (True) or treat them as their own small group (False, default). @@ -419,13 +416,13 @@ def _determine_stride_partitioning( workflow. An obsid may have itself as a primary obsid (e.g., if stride_length == 1, then each obsid will have itself, as well as its time neighbors, as primary obsids). If `stride_length` and - `n_curr_time_neighbors` are such that there are obsids that do not belong to + `chunk_size` are such that there are obsids that do not belong to any group, then the value is an empty list. """ if stride_length is None: stride_length = 1 - if n_curr_time_neighbors is None: - n_curr_time_neighbors = 0 + if chunk_size is None: + chunk_size = 1 if time_centered is None: time_centered = True if collect_stragglers is None: @@ -433,9 +430,9 @@ def _determine_stride_partitioning( obsids = sort_obsids(obsids) try: - n_curr_time_neighbors = int(n_curr_time_neighbors) + chunk_size = int(chunk_size) except ValueError: - raise ValueError("n_curr_time_neighbors must be able to be interpreted as an int.") + raise ValueError("chunk_size must be able to be interpreted as an int.") try: stride_length = int(stride_length) except ValueError: @@ -454,38 +451,37 @@ def _determine_stride_partitioning( primary_obsids = [] per_obsid_primary_obsids = [[] for i in range(len(obsids))] - for idx in range(time_centered * n_curr_time_neighbors, len(obsids), stride_length): + for idx in range(time_centered * (chunk_size // 2), len(obsids), stride_length): # Compute the number of remaining obsids to process. # We account for the location of the next stride to determine if we # should grab straggling obsids. n_following = len(obsids) - (idx + stride_length) if time_centered: - i1 = max(idx - n_curr_time_neighbors, 0) + i0 = max(idx - chunk_size // 2, 0) + i1 = idx + (chunk_size + 1) // 2 else: - i1 = idx - i2 = idx + n_curr_time_neighbors + 1 - # Check to see if i2 would be past the end of the array. If + i0 = idx + i1 = idx + chunk_size + # Check to see if i1 would be past the end of the array. If # `collect_stragglers` is True, then we would have broken out of the # loop on the iteration previous to the current one. Otherwise we drop # the remaining obsids because there are insufficient time neighbors to # make a full set. - if i2 > len(obsids): + if i1 > len(obsids): break - if n_following < (n_curr_time_neighbors + 1) and collect_stragglers: + if (n_following < chunk_size) and collect_stragglers: # Figure out if any observations that would normally have been skipped # will be lumped in by getting all remaining observations. - # "stride_length - 1" is the actual number of observations between - # current idx and next one given stride_length. - gap = (stride_length - 1) - n_curr_time_neighbors * (1 + time_centered) + gap = stride_length - chunk_size if gap > 0: warnings.warn( "Collecting stragglers is incompatible with gaps between " "consecutive strides. Not collecting stragglers..." ) else: - i2 = len(obsids) + i1 = len(obsids) primary_obsids.append(obsids[idx]) - for i in range(i1, i2): + for i in range(i0, i1): per_obsid_primary_obsids[i].append(obsids[idx]) # skip what would have been the last iteration, because we've @@ -493,7 +489,7 @@ def _determine_stride_partitioning( break # assign indices primary_obsids.append(obsids[idx]) - for i in range(i1, i2): + for i in range(i0, i1): per_obsid_primary_obsids[i].append(obsids[idx]) return primary_obsids, per_obsid_primary_obsids @@ -503,7 +499,7 @@ def prep_args( args, obsid, obsids=None, - n_curr_time_neighbors="0", + chunk_size="1", stride_length="1", time_centered=None, collect_stragglers=None, @@ -520,16 +516,17 @@ def prep_args( Filename/obsid to be substituted. obsids : list of str, optional Full list of obsids. Required when time-adjacent neighbors are desired. - n_curr_time_neighbors : str - Number of neighboring time files to append to list. If set to the + chunk_size : str + Number of obs files to append to list. If set to the string "all", then all neighbors from that JD are added. stride_length : str Number of files to include in a stride. This interacts with - `n_curr_time_neighbors` to define how arguments are generate. + `chunk_size` to define how arguments are generate. time_centered : bool, optional - Whether the provided obsid should be in the center of the neighbors. - If True (default), returns n_curr_time_neighbors on either side of obsid. - If False, returns original obsid _and_ n_curr_time_neighbors following. + Whether the provided obsid should be in the center of the chunk. + If True (default), returns (n_chunk - 1) // 2 on either side of obsid. + If n_chunk is even, there will be one more obsid on the left. + If False, returns original obsid _and_ (chunk_size - 1) following. collect_stragglers : bool, optional Whether to lump files close to the end of the list ("stragglers") into the previous group, or belong to their own smaller group. @@ -596,7 +593,7 @@ def prep_args( _, per_obsid_primary_obsids = _determine_stride_partitioning( obsids, stride_length=stride_length, - n_curr_time_neighbors=n_curr_time_neighbors, + chunk_size=chunk_size, time_centered=time_centered, collect_stragglers=collect_stragglers, ) @@ -751,19 +748,19 @@ def build_analysis_makeflow_from_config( if idx != len(workflow) - 1: raise ValueError("TEARDOWN must be last entry of workflow") - # Check for actions that use n_curr_time_neighbors, make sure obsid_list is last arg + # Check for actions that use chunk_size, make sure obsid_list is last arg for action in workflow: - n_curr_time_neighbors = get_config_entry( - config, action, "n_curr_time_neighbors", required=False, total_length=len(obsids) + chunk_size = get_config_entry( + config, action, "chunk_size", required=False ) - if n_curr_time_neighbors is not None: + if chunk_size is not None: this_args = get_config_entry(config, action, "args", required=True) if "{obsid_list}" in this_args: bn_idx = this_args.index("{obsid_list}") if bn_idx != len(this_args) - 1: raise ValueError( "{obsid_list} must be the last argument for action" - f" {action} because n_curr_time_neighbors is specified." + f" {action} because chunk_size is specified." ) path_to_do_scripts = get_config_entry(config, "Options", "path_to_do_scripts") @@ -915,16 +912,16 @@ def build_analysis_makeflow_from_config( required=False, total_length=len(obsids), ) - n_prev_time_neighbors = get_config_entry( + prereq_chunk_size = get_config_entry( config, action, - "n_prev_time_neighbors", + "prereq_chunk_size", required=False, ) - n_curr_time_neighbors = get_config_entry( + chunk_size = get_config_entry( config, action, - "n_curr_time_neighbors", + "chunk_size", required=False, total_length=len(obsids), ) @@ -944,7 +941,7 @@ def build_analysis_makeflow_from_config( ) = _determine_stride_partitioning( sorted_obsids, stride_length=stride_length, - n_curr_time_neighbors=n_curr_time_neighbors, + chunk_size=chunk_size, time_centered=time_centered, collect_stragglers=collect_stragglers, ) @@ -1020,20 +1017,20 @@ def build_analysis_makeflow_from_config( "workflow".format(prereq, action) ) # add neighbors - prev_neighbors = make_time_neighbor_list( + prev_neighbors = make_chunk_list( filename, prereq, obsids, - n_time_neighbors=n_prev_time_neighbors, + chunk_size=prereq_chunk_size, time_centered=time_centered, stride_length=stride_length, collect_stragglers=collect_stragglers, ) - curr_neighbors = make_time_neighbor_list( + curr_neighbors = make_chunk_list( filename, prereq, obsids, - n_time_neighbors=n_curr_time_neighbors, + chunk_size=chunk_size, time_centered=time_centered, stride_length=stride_length, collect_stragglers=collect_stragglers, @@ -1059,7 +1056,7 @@ def build_analysis_makeflow_from_config( args, filename, obsids=obsids, - n_curr_time_neighbors=n_curr_time_neighbors, + chunk_size=chunk_size, stride_length=stride_length, time_centered=time_centered, collect_stragglers=collect_stragglers, diff --git a/hera_opm/tests/bad_configs/bad_example_obsid_list.toml b/hera_opm/tests/bad_configs/bad_example_obsid_list.toml index 48ce1529..e54d9470 100644 --- a/hera_opm/tests/bad_configs/bad_example_obsid_list.toml +++ b/hera_opm/tests/bad_configs/bad_example_obsid_list.toml @@ -20,7 +20,7 @@ actions = ["SETUP", "ANT_METRICS", "FIRSTCAL", "FIRSTCAL_METRICS", [ANT_METRICS] args = "{obsid_list} {basename}" stride_length = 4 -n_curr_time_neighbors = 2 +chunk_size = 2 [FIRSTCAL] args = ["{basename}", "${Options:ex_ants}"] diff --git a/hera_opm/tests/test_mf_tools.py b/hera_opm/tests/test_mf_tools.py index 20921206..7d339e49 100644 --- a/hera_opm/tests/test_mf_tools.py +++ b/hera_opm/tests/test_mf_tools.py @@ -31,11 +31,11 @@ def config_options(): config_dict["config_file"] = os.path.join( DATA_PATH, "sample_config", "nrao_rtp.toml" ) - config_dict["config_file_time_neighbors"] = os.path.join( - DATA_PATH, "sample_config", "nrao_rtp_time_neighbors.toml" + config_dict["config_file_chunk_size"] = os.path.join( + DATA_PATH, "sample_config", "nrao_rtp_chunk_size.toml" ) - config_dict["config_file_time_neighbors_all"] = os.path.join( - DATA_PATH, "sample_config", "nrao_rtp_time_neighbors_all.toml" + config_dict["config_file_chunk_size_all"] = os.path.join( + DATA_PATH, "sample_config", "nrao_rtp_chunk_size_all.toml" ) config_dict["config_file_options"] = os.path.join( DATA_PATH, "sample_config", "nrao_rtp_options.toml" @@ -145,23 +145,10 @@ def test_get_config_entry(config_options): def test_get_config_entry_total_length(config_options): """Test setting a total_length for an entry.""" # retreive config - config = toml.load(config_options["config_file_time_neighbors_all"]) + config = toml.load(config_options["config_file_chunk_size_all"]) - # Default is to time center assert ( - mt.get_config_entry(config, "XRFI", "n_curr_time_neighbors", total_length=15) == "7" - ) - assert ( - mt.get_config_entry( - config, "XRFI_CENTERED", "n_curr_time_neighbors", total_length=21 - ) - == "10" - ) - assert ( - mt.get_config_entry( - config, "XRFI_NOT_CENTERED", "n_curr_time_neighbors", total_length=7 - ) - == "6" + mt.get_config_entry(config, "XRFI", "stride_length", total_length=15) == "15" ) @@ -174,55 +161,55 @@ def test_make_outfile_name(config_options): assert set(mt.make_outfile_name(obsid, action)) == outfiles -def test_make_time_neighbor_list(config_options): +def test_make_chunk_list(config_options): # define args obsid = config_options["obsids"][1] action = "OMNICAL" obsids = config_options["obsids"] outfiles = [obs + ".OMNICAL.out" for obs in obsids[:3]] assert set( - mt.make_time_neighbor_list( - obsid, action, obsids=obsids, n_time_neighbors=1, return_outfiles=True + mt.make_chunk_list( + obsid, action, obsids=obsids, chunk_size=3, return_outfiles=True ) ) == set(outfiles) - # test asking for "all" neighbors + # test asking for "all" obsids assert set( - mt.make_time_neighbor_list( - obsid, action, obsids, n_time_neighbors="all", return_outfiles=True + mt.make_chunk_list( + obsid, action, obsids, chunk_size="all", return_outfiles=True ) ) == set(outfiles) # test edge cases obsid = obsids[0] assert set( - mt.make_time_neighbor_list( - obsid, action, obsids, n_time_neighbors=1, return_outfiles=True + mt.make_chunk_list( + obsid, action, obsids, chunk_size=3, return_outfiles=True ) ) == set(outfiles[:2]) obsid = obsids[2] assert set( - mt.make_time_neighbor_list( - obsid, action, obsids, n_time_neighbors=1, return_outfiles=True + mt.make_chunk_list( + obsid, action, obsids, chunk_size=3, return_outfiles=True ) ) == set(outfiles[1:]) -def test_make_time_neighbor_list_errors(config_options): +def test_make_chunk_list_errors(config_options): # test not having the obsid in the supplied list obsid = "zen.1234567.12345.xx.HH.uvcA" action = "OMNICAL" obsids = config_options["obsids"] with pytest.raises(ValueError): - mt.make_time_neighbor_list(obsid, action, obsids) + mt.make_chunk_list(obsid, action, obsids) - # test passing in nonsense for all_neighbors + # test passing in nonsense for chunk_size with pytest.raises(ValueError): - mt.make_time_neighbor_list(obsids[0], action, obsids, n_time_neighbors="blah") + mt.make_chunk_list(obsids[0], action, obsids, chunk_size="blah") - # test passing in a negative number of neighbors + # test passing in a negative number of chunk_size with pytest.raises(ValueError): - mt.make_time_neighbor_list(obsids[0], action, obsids, n_time_neighbors="-1") + mt.make_chunk_list(obsids[0], action, obsids, chunk_size="-1") return @@ -337,7 +324,7 @@ def test_determine_stride_partitioning(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=1, - n_curr_time_neighbors=2, + chunk_size=5, time_centered=True, collect_stragglers=False, ) @@ -370,8 +357,8 @@ def test_determine_stride_partitioning(config_options): def test_determine_stride_partitioning_defaults(config_options): input_obsids = list(config_options["obsids_long_dummy_list"][:9]) - # run without specifying anything -- defaults to stride of 1 and 0 time - # neighbors + # run without specifying anything -- defaults to stride of 1 and 1 chunk + # size primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, time_centered=True, collect_stragglers=False ) @@ -379,9 +366,9 @@ def test_determine_stride_partitioning_defaults(config_options): target_list = [input_obsids[idx : idx + 1] for idx in range(len(input_obsids))] assert per_obsid_primary_obsids == target_list - # run again with n_time_neighbors = 2 + # run again with chunk_size = 5 primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( - input_obsids, n_curr_time_neighbors=2, time_centered=True, collect_stragglers=False, + input_obsids, chunk_size=5, time_centered=True, collect_stragglers=False, ) # the results should be the same as in test_determine_stride_partitioning assert primary_obsids == list(input_obsids[2:-2]) @@ -407,7 +394,7 @@ def test_determine_stride_partitioning_collect_stragglers(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=4, - n_curr_time_neighbors=3, + chunk_size=4, time_centered=False, collect_stragglers=True, ) @@ -437,17 +424,17 @@ def test_determine_stride_partitioning_errors(config_options): ValueError, match="stride_length must be able to be interpreted as an int" ): mt._determine_stride_partitioning( - input_obsids, stride_length="foo", n_curr_time_neighbors=1 + input_obsids, stride_length="foo", chunk_size=3 ) with pytest.raises( - ValueError, match="n_curr_time_neighbors must be able to be interpreted as an int" + ValueError, match="chunk_size must be able to be interpreted as an int" ): - mt._determine_stride_partitioning(input_obsids, n_curr_time_neighbors="foo") + mt._determine_stride_partitioning(input_obsids, chunk_size="foo") with pytest.raises(ValueError, match="time_centered must be a boolean variable"): mt._determine_stride_partitioning( - input_obsids, stride_length=1, n_curr_time_neighbors=1, time_centered="False", + input_obsids, stride_length=1, chunk_size=2, time_centered="False", ) with pytest.raises( @@ -456,7 +443,7 @@ def test_determine_stride_partitioning_errors(config_options): mt._determine_stride_partitioning( input_obsids, stride_length=1, - n_curr_time_neighbors=1, + chunk_size=1, time_centered=False, collect_stragglers="True", ) @@ -470,7 +457,7 @@ def test_determine_stride_partitioning_noncontiguous_stragglers(config_options): primary_obsids, per_obsid_primary_obsids = mt._determine_stride_partitioning( input_obsids, stride_length=10, - n_curr_time_neighbors=1, + chunk_size=2, time_centered=False, collect_stragglers=True, ) @@ -573,10 +560,10 @@ def test_build_analysis_makeflow_from_config_missing_prereq(config_options): return -def test_build_analysis_makeflow_from_config_time_neighbors(config_options): +def test_build_analysis_makeflow_from_config_chunk_size(config_options): # define args obsids = config_options["obsids"] - config_file = config_options["config_file_time_neighbors"] + config_file = config_options["config_file_chunk_size"] work_dir = os.path.join(DATA_PATH, "test_output") mf_output = os.path.splitext(os.path.basename(config_file))[0] + ".mf" @@ -602,7 +589,7 @@ def test_build_analysis_makeflow_from_config_time_neighbors(config_options): wrapper_fn = "wrapper_" + obsid + "." + action + ".sh" wrapper_fn = os.path.join(work_dir, wrapper_fn) assert os.path.exists(wrapper_fn) - # some actions will not run for edge observations because they need time neighbors. + # some actions will not run for edge observations because they need neighbors. for obsid in obsids[1:-1]: for action in ntime_actions: wrapper_fn = "wrapper_" + obsid + "." + action + ".sh" @@ -1165,7 +1152,7 @@ def test_prep_args_obsid_list(config_options): args, obsid, obsids=obsids_list, - n_curr_time_neighbors="1", + chunk_size="3", time_centered=None, collect_stragglers=False, ) @@ -1185,7 +1172,7 @@ def test_prep_args_obsid_list_centered(config_options): args, obsid, obsids=obsids_list, - n_curr_time_neighbors="1", + chunk_size="3", time_centered=True, collect_stragglers=False, ) @@ -1205,7 +1192,7 @@ def test_prep_args_obsid_list_not_centered(config_options): args, obsid, obsids=obsids_list, - n_curr_time_neighbors="1", + chunk_size="2", time_centered=False, collect_stragglers=False, ) @@ -1225,7 +1212,7 @@ def test_prep_args_obsid_list_with_stragglers(config_options): args, obsid, obsids=obsids_list, - n_curr_time_neighbors="1", + chunk_size="2", stride_length="2", time_centered=False, collect_stragglers=True, @@ -1247,18 +1234,18 @@ def test_prep_args_obsid_list_error(config_options): args, obsid, obsids=obsids_list, - n_curr_time_neighbors="foo", + chunk_size="foo", time_centered=True, collect_stragglers=False, ) - assert str(cm.value).startswith("n_curr_time_neighbors must be able to be interpreted") + assert str(cm.value).startswith("chunk_size must be able to be interpreted") with pytest.raises(ValueError) as cm: args = mt.prep_args( args, obsid, obsids=obsids_list, - n_curr_time_neighbors="1", + chunk_size="3", stride_length="foo", time_centered=True, collect_stragglers=False, From 39f0053e6224bdc49d5e656a1a805e6127907686 Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 15 Jul 2020 13:07:40 -0700 Subject: [PATCH 15/22] update h3c tomls for new api --- pipelines/h3c/idr2/v1/h3c_idr2_1.toml | 20 ++++++++-------- .../h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 14 +++++------ pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml | 24 +++++++------------ 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1.toml index 45935c2f..1b53a17c 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1.toml @@ -115,7 +115,7 @@ args = "{basename}" [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" -n_time_neighbors = "all" +chunk_size = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] @@ -132,10 +132,10 @@ args = ["${XRFI_OPTS:kt_size}", "${XRFI_OPTS:kf_size}", "${XRFI_OPTS:sig_init}", "${XRFI_OPTS:sig_adj}", "{obsid_list}"] # xrfi will drop data within kt_size of time edge. Total integrations read in # must be greater than 2 * kt_size. With 2 integrations per file, and kt_size=8, -# n_time_neighbors must be >= 4 (with centered==True). -# Larger n_time_neighbors will reduce redundant i/o and median filter calculation. +# chunk_size must be >= 9 (with centered==True). +# Larger chunk_size will reduce redundant i/o and median filter calculation. # The numbers below result in about 35% redundancy. -n_time_neighbors = 15 +chunk_size = 31 stride_length = 23 time_centered = true collect_stragglers = true @@ -146,13 +146,13 @@ prereqs = "XRFI" args = ["${XRFI_DAY_THRESHOLD_OPTS:nsig_f}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f_adj}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t_adj}", "{obsid_list}"] -n_time_neighbors = "all" +chunk_size = "all" stride_length = "all" time_centered = false [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" -n_time_neighbors = "all" +chunk_size = "all" mem = 128000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -161,22 +161,22 @@ args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_s [UPDATE_OMNISOL] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" args = ["{basename}"] [NOISE] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" mem = 96000 args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" -n_time_neighbors = "all" +chunk_size = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 5a93130e..56fa2a49 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -118,7 +118,7 @@ args = "{basename}" [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" -n_prev_time_neighbors = "all" +prereq_chunk_size = "all" stride_length = 1 time_centered = false mem = 128000 @@ -143,13 +143,13 @@ prereqs = "XRFI_PRECHUNK" args = ["${XRFI_DAY_THRESHOLD_OPTS:nsig_f}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f_adj}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t_adj}", "{obsid_list}"] -n_curr_time_neighbors = "all" +chunk_size = "all" stride_length = "all" time_centered = false [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" -n_prev_time_neighbors = "all" +prereq_chunk_size = "all" mem = 128000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -158,17 +158,17 @@ args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_s [UPDATE_OMNISOL] prereqs = "CAL_SMOOTH" -n_prev_time_neighbors = "all" +prereq_chunk_size = "all" args = ["{basename}"] [NOISE] prereqs = "CAL_SMOOTH" -n_prev_time_neighbors = "all" +prereq_chunk_size = "all" args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" stride_length = 1 time_centered = false mem = 96000 @@ -177,6 +177,6 @@ args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scrip [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" -n_prev_time_neighbors = "all" +prereq_chunk_size = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml b/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml index d48a871a..eec29268 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml @@ -129,8 +129,7 @@ args = "{basename}" [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" -time_prereqs = "REDCAL" -n_time_neighbors = "all" +chunk_size = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] @@ -149,15 +148,13 @@ args = ["{basename}", "${XRFI_OPTS:kt_size}", "${XRFI_OPTS:kf_size}", "${XRFI_OP [XRFI_DAY_THRESHOLD] prereqs = "XRFI" -time_prereqs = "XRFI" -n_time_neighbors = "all" +chunk_size = "all" args = ["{basename}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_f_adj}", "${XRFI_DAY_THRESHOLD_OPTS:nsig_t_adj}"] [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" -time_prereqs = "XRFI_DAY_THRESHOLD" -n_time_neighbors = "all" +chunk_size = "all" mem = 64000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -166,34 +163,29 @@ args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_s [UPDATE_OMNISOL] prereqs = "CAL_SMOOTH" -time_prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" args = ["{basename}"] [NOISE] prereqs = "CAL_SMOOTH" -time_prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -time_prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" mem = 96000 args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" -time_prereqs = "ANT_METRICS" -n_time_neighbors = "all" +chunk_size = "all" mem = 128000 args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] [DELAY] prereqs = "CAL_SMOOTH" -time_prereqs = "CAL_SMOOTH" -n_time_neighbors = "all" +chunk_size = "all" mem = 64000 args = ["{basename}", "${DELAY_OPTS:calibration}", "${DELAY_OPTS:nbls_partial}", "${DELAY_OPTS:output_label}", "${DELAY_OPTS:spw0}", "${DELAY_OPTS:spw1}", "${DELAY_OPTS:tol}", "${DELAY_OPTS:standoff}", "${DELAY_OPTS:cache_dir}"] From 695c0cc8b93cc3f2c507b71f194aaf6390dac11d Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 15 Jul 2020 13:41:13 -0700 Subject: [PATCH 16/22] fix tomls again --- pipelines/h3c/idr2/v1/h3c_idr2_1.toml | 36 ++++++++++--------- .../h3c/idr2/v1/h3c_idr2_1_prechunk.toml | 21 ++++++----- pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml | 3 ++ .../idr2/v1/task_scripts/do_MAKE_NOTEBOOK.sh | 35 +++++++++--------- .../task_scripts/do_MAKE_REDCAL_NOTEBOOK.sh | 36 +++++++++---------- 5 files changed, 64 insertions(+), 67 deletions(-) diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1.toml index 1b53a17c..98e0e707 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1.toml @@ -62,7 +62,7 @@ alpha = 0.3 freq_threshold = 1.0 time_threshold = 1.0 ant_threshold = 0.5 -lst_blacklists = "0-1.3 1.84-1.9 2.1-2.19 2.27-2.47 2.5-4.3 5.25-5.56 5.59-5.7 5.89-5.96 6.11-6.32 6.5-9.1 9.37-9.44 9.51-9.58 10.9-11.5 11.9-14.3 16.3-1.3" +lst_blacklists = "0-1.3 2.5-4.3 5.0-5.7 6.5-9.1 10.6-11.5 11.9-14.3 16.3-1.3" [IMAGING_OPTS] casa = "/home/casa/packages/RHEL7/release/casa-release-5.1.0-68/bin/casa" @@ -76,7 +76,6 @@ actions = ["FIX_DATAFILE", "EXTRACT_AUTOS", "ANT_METRICS", "REDCAL", - "FIRSTCAL_METRICS", "ABSCAL", "XRFI", "XRFI_DAY_THRESHOLD", @@ -91,11 +90,11 @@ actions = ["FIX_DATAFILE", args = "{basename}" [EXTRACT_AUTOS] +prereqs = "FIX_DATAFILE" args = ["{basename}"] [ANT_METRICS] -# prereqs = "FIX_DATAFILE" -mem = 64000 +prereqs = "FIX_DATAFILE" args = ["{basename}", "${ANT_METRICS_OPTS:crossCut}", "${ANT_METRICS_OPTS:deadCut}", "${ANT_METRICS_OPTS:extension}", "${ANT_METRICS_OPTS:vis_format}"] @@ -109,15 +108,13 @@ args = ["{basename}", "${Options:ex_ants_path}", "${REDCAL_OPTS:ant_z_thresh}", "${REDCAL_OPTS:max_bl_cut}", "${REDCAL_OPTS:iter0_prefix}", "${ANT_METRICS_OPTS:extension}", "${REDCAL_OPTS:good_statuses}"] -[FIRSTCAL_METRICS] -prereqs = "REDCAL" -args = "{basename}" - [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" -chunk_size = "all" +prereq_chunk_size = "all" +stride_length = "all" +time_centered = false mem = 128000 -args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] +args = ["{basename}", "${NB_OPTS:basenbdir}"] [ABSCAL] prereqs = "REDCAL" @@ -152,7 +149,10 @@ time_centered = false [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" -chunk_size = "all" +prereq_chunk_size = "all" +chunk_size = 1 +stride_length = "all" +time_centered = false mem = 128000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -161,22 +161,24 @@ args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_s [UPDATE_OMNISOL] prereqs = "CAL_SMOOTH" -chunk_size = "all" +prereq_chunk_size = "all" args = ["{basename}"] [NOISE] prereqs = "CAL_SMOOTH" -chunk_size = "all" +prereq_chunk_size = "all" args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -chunk_size = "all" -mem = 96000 +prereq_chunk_size = "all" args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" -chunk_size = "all" +prereq_chunk_size = "all" +chunk_size = 1 +stride_length = "all" +time_centered = false mem = 128000 -args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] +args = ["{basename}", "${NB_OPTS:basenbdir}"] diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml index 56fa2a49..6fe564fb 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_1_prechunk.toml @@ -77,7 +77,6 @@ actions = ["FIX_DATAFILE", "EXTRACT_AUTOS", "ANT_METRICS", "REDCAL", - "FIRSTCAL_METRICS", "ABSCAL", "XRFI_PRECHUNK", "XRFI_DAY_THRESHOLD", @@ -112,17 +111,13 @@ args = ["{basename}", "${Options:ex_ants_path}", "${REDCAL_OPTS:ant_z_thresh}", "${REDCAL_OPTS:max_bl_cut}", "${REDCAL_OPTS:iter0_prefix}", "${ANT_METRICS_OPTS:extension}", "${REDCAL_OPTS:good_statuses}"] -[FIRSTCAL_METRICS] -prereqs = "REDCAL" -args = "{basename}" - [MAKE_REDCAL_NOTEBOOK] prereqs = "REDCAL" prereq_chunk_size = "all" -stride_length = 1 +stride_length = "all" time_centered = false mem = 128000 -args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] +args = ["{basename}", "${NB_OPTS:basenbdir}"] [ABSCAL] prereqs = "REDCAL" @@ -150,6 +145,9 @@ time_centered = false [CAL_SMOOTH] prereqs = "XRFI_DAY_THRESHOLD" prereq_chunk_size = "all" +chunk_size = 1 +stride_length = "all" +time_centered = false mem = 128000 args = ["{basename}", "${CAL_SMOOTH_OPTS:freq_scale}", "${CAL_SMOOTH_OPTS:time_scale}", "${CAL_SMOOTH_OPTS:tol}", "${CAL_SMOOTH_OPTS:filter_mode}", "${CAL_SMOOTH_OPTS:window}", @@ -168,9 +166,7 @@ args = ["{basename}"] [IMAGING] prereqs = "CAL_SMOOTH" -chunk_size = "all" -stride_length = 1 -time_centered = false +prereq_chunk_size = "all" mem = 96000 # mem = 64000 args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scripts}", "${IMAGING_OPTS:calibration}"] @@ -178,5 +174,8 @@ args = ["{basename}", "${IMAGING_OPTS:casa}", "${IMAGING_OPTS:casa_imaging_scrip [MAKE_NOTEBOOK] prereqs = "ANT_METRICS" prereq_chunk_size = "all" +chunk_size = 1 +stride_length = "all" +time_centered = false mem = 128000 -args = ["{basename}", "{prev_basename}", "${NB_OPTS:basenbdir}"] +args = ["{basename}", "${NB_OPTS:basenbdir}"] diff --git a/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml b/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml index eec29268..71075455 100644 --- a/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml +++ b/pipelines/h3c/idr2/v1/h3c_idr2_filtering.toml @@ -1,3 +1,6 @@ +NOTE TO E-DUBBS: This file is borked. Talk to Adam or Josh about updating +for new chunking keywords. Or check other tomls. + [Options] makeflow_type = "analysis" path_to_do_scripts = "/users/jsdillon/Libraries/hera_opm/pipelines/h3c/idr2/v1/task_scripts" diff --git a/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_NOTEBOOK.sh b/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_NOTEBOOK.sh index 70b2f3b7..aa6301b4 100755 --- a/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_NOTEBOOK.sh +++ b/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_NOTEBOOK.sh @@ -1,26 +1,23 @@ set -e -if [ $2 == "None" ]; -then - src_dir="$(dirname $0)" - source ${src_dir}/_common.sh - jd=$(get_int_jd ${1}) - OUTPUT=data_inspect_"$jd".ipynb - BASENBDIR=$3 +src_dir="$(dirname $0)" +source ${src_dir}/_common.sh - export DATA_PATH=`pwd` - export JULIANDATE=$jd +jd=$(get_int_jd ${1}) +OUTPUT=data_inspect_"$jd".ipynb +BASENBDIR=$2 - jupyter nbconvert --output=$OUTPUT \ - --to notebook \ - --ExecutePreprocessor.allow_errors=True \ - --ExecutePreprocessor.timeout=-1 \ - --execute ${BASENBDIR}/data_inspect_H3C.ipynb +export DATA_PATH=`pwd` +export JULIANDATE=$jd - # git pull origin master - # git add ${OUTPUT} - # git commit -m "RTP data inspection notebook commit for JD ${jd}" - # git push origin master +jupyter nbconvert --output=$OUTPUT \ +--to notebook \ +--ExecutePreprocessor.allow_errors=True \ +--ExecutePreprocessor.timeout=-1 \ +--execute ${BASENBDIR}/data_inspect_H3C.ipynb -fi +# git pull origin master +# git add ${OUTPUT} +# git commit -m "RTP data inspection notebook commit for JD ${jd}" +# git push origin master diff --git a/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_REDCAL_NOTEBOOK.sh b/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_REDCAL_NOTEBOOK.sh index 96ab6c89..a29ae6e2 100755 --- a/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_REDCAL_NOTEBOOK.sh +++ b/pipelines/h3c/idr2/v1/task_scripts/do_MAKE_REDCAL_NOTEBOOK.sh @@ -1,26 +1,22 @@ set -e -if [ $2 == "None" ]; -then - src_dir="$(dirname $0)" - source ${src_dir}/_common.sh +src_dir="$(dirname $0)" +source ${src_dir}/_common.sh - jd=$(get_int_jd ${1}) - OUTPUT=Redcal_Inspect_"$jd".ipynb - BASENBDIR=$3 +jd=$(get_int_jd ${1}) +OUTPUT=Redcal_Inspect_"$jd".ipynb +BASENBDIR=$2 - export DATA_PATH=`pwd` - export JULIANDATE=$jd +export DATA_PATH=`pwd` +export JULIANDATE=$jd - jupyter nbconvert --output=$OUTPUT \ - --to notebook \ - --ExecutePreprocessor.allow_errors=True \ - --ExecutePreprocessor.timeout=-1 \ - --execute ${BASENBDIR}/Redcal_Inspect_H3C.ipynb +jupyter nbconvert --output=$OUTPUT \ +--to notebook \ +--ExecutePreprocessor.allow_errors=True \ +--ExecutePreprocessor.timeout=-1 \ +--execute ${BASENBDIR}/Redcal_Inspect_H3C.ipynb - # git pull origin master - # git add ${OUTPUT} - # git commit -m "RTP data inspection notebook commit for JD ${jd}" - # git push origin master - -fi +# git pull origin master +# git add ${OUTPUT} +# git commit -m "RTP data inspection notebook commit for JD ${jd}" +# git push origin master From 8f62b07bd6a9579320ebccd518f5437b75c6382f Mon Sep 17 00:00:00 2001 From: Adam Beardsley Date: Wed, 15 Jul 2020 13:46:21 -0700 Subject: [PATCH 17/22] reinstate 'all' substitution for chunk_size --- hera_opm/mf_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index 3171d67f..ae4a83d3 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -93,7 +93,7 @@ def get_config_entry( "${header:item}". If the corresponding key is not defined in that part of the config file, an error is raised. Default is True. total_length : int, optional - If this parameter is `stride_length`, + If this parameter is in ["stride_length", "chunk_size"], the entry will be further parsed to interpret 'all', and be replaced with `total_length`. @@ -118,7 +118,7 @@ def get_config_entry( entries[i] = _interpolate_config(config, entry) else: entries = _interpolate_config(config, entries) - if (item in ["stride_length"]) and (entries == "all"): + if (item in ["stride_length", "chunk_size"]) and (entries == "all"): entries = str(total_length) return entries except KeyError: From f5ae2931319dc8a47529ca49acbbd706d272b55f Mon Sep 17 00:00:00 2001 From: Josh Dillon Date: Fri, 17 Jul 2020 20:10:36 -0700 Subject: [PATCH 18/22] flag bottom section for 2458838 --- pipelines/h3c/idr2/v1/bad_ants/2458838.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pipelines/h3c/idr2/v1/bad_ants/2458838.txt b/pipelines/h3c/idr2/v1/bad_ants/2458838.txt index 2f680497..756f8a16 100644 --- a/pipelines/h3c/idr2/v1/bad_ants/2458838.txt +++ b/pipelines/h3c/idr2/v1/bad_ants/2458838.txt @@ -7,3 +7,10 @@ 100 116 120 +0 +1 +13 +23 +24 +25 +26 From 15a2a6dd40156555669390b2efb69dd3f6c130c6 Mon Sep 17 00:00:00 2001 From: Paul La Plante Date: Fri, 24 Jul 2020 14:31:47 -0700 Subject: [PATCH 19/22] Format to appease linter --- hera_opm/mf_tools.py | 9 ++------- hera_opm/tests/test_mf_tools.py | 12 +++--------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/hera_opm/mf_tools.py b/hera_opm/mf_tools.py index ae4a83d3..6c3fa35a 100644 --- a/hera_opm/mf_tools.py +++ b/hera_opm/mf_tools.py @@ -750,9 +750,7 @@ def build_analysis_makeflow_from_config( # Check for actions that use chunk_size, make sure obsid_list is last arg for action in workflow: - chunk_size = get_config_entry( - config, action, "chunk_size", required=False - ) + chunk_size = get_config_entry(config, action, "chunk_size", required=False) if chunk_size is not None: this_args = get_config_entry(config, action, "args", required=True) if "{obsid_list}" in this_args: @@ -913,10 +911,7 @@ def build_analysis_makeflow_from_config( total_length=len(obsids), ) prereq_chunk_size = get_config_entry( - config, - action, - "prereq_chunk_size", - required=False, + config, action, "prereq_chunk_size", required=False, ) chunk_size = get_config_entry( config, diff --git a/hera_opm/tests/test_mf_tools.py b/hera_opm/tests/test_mf_tools.py index 7d339e49..1c4a153e 100644 --- a/hera_opm/tests/test_mf_tools.py +++ b/hera_opm/tests/test_mf_tools.py @@ -147,9 +147,7 @@ def test_get_config_entry_total_length(config_options): # retreive config config = toml.load(config_options["config_file_chunk_size_all"]) - assert ( - mt.get_config_entry(config, "XRFI", "stride_length", total_length=15) == "15" - ) + assert mt.get_config_entry(config, "XRFI", "stride_length", total_length=15) == "15" def test_make_outfile_name(config_options): @@ -183,15 +181,11 @@ def test_make_chunk_list(config_options): # test edge cases obsid = obsids[0] assert set( - mt.make_chunk_list( - obsid, action, obsids, chunk_size=3, return_outfiles=True - ) + mt.make_chunk_list(obsid, action, obsids, chunk_size=3, return_outfiles=True) ) == set(outfiles[:2]) obsid = obsids[2] assert set( - mt.make_chunk_list( - obsid, action, obsids, chunk_size=3, return_outfiles=True - ) + mt.make_chunk_list(obsid, action, obsids, chunk_size=3, return_outfiles=True) ) == set(outfiles[1:]) From e95cbedc74c01c0eae2b90e39247f11163fb06da Mon Sep 17 00:00:00 2001 From: Paul La Plante Date: Fri, 24 Jul 2020 14:57:50 -0700 Subject: [PATCH 20/22] Add more testing coverage --- hera_opm/tests/test_mf_tools.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/hera_opm/tests/test_mf_tools.py b/hera_opm/tests/test_mf_tools.py index 1c4a153e..8edc545e 100644 --- a/hera_opm/tests/test_mf_tools.py +++ b/hera_opm/tests/test_mf_tools.py @@ -187,6 +187,48 @@ def test_make_chunk_list(config_options): assert set( mt.make_chunk_list(obsid, action, obsids, chunk_size=3, return_outfiles=True) ) == set(outfiles[1:]) + assert set( + mt.make_chunk_list( + obsid, + action, + obsids, + chunk_size=3, + time_centered=False, + return_outfiles=True, + ) + ) == set(outfiles[2:]) + + return + + +def test_make_chunk_list_gap(config_options): + # test having a gap in the chunk list + obsid = config_options["obsids"][1] + action = "OMNICAL" + obsids = config_options["obsids"] + with pytest.warns(UserWarning, match="Collecting stragglers is incompatible"): + chunk_list = mt.make_chunk_list( + obsid, + action, + obsids=obsids, + chunk_size=2, + stride_length=3, + collect_stragglers=True, + ) + assert set(chunk_list) == set(obsids[:2]) + + # do it again with no gap + chunk_list = mt.make_chunk_list( + obsid, + action, + obsids=obsids, + chunk_size=3, + stride_length=3, + collect_stragglers=True, + ) + assert set(chunk_list) == set(obsids) + + return def test_make_chunk_list_errors(config_options): From ae691da9fccfe4e18d46755e271e2176176c5f46 Mon Sep 17 00:00:00 2001 From: Paul La Plante Date: Fri, 24 Jul 2020 15:30:16 -0700 Subject: [PATCH 21/22] Update docs --- docs/config_files.md | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/docs/config_files.md b/docs/config_files.md index c2dff80f..38f90a5c 100644 --- a/docs/config_files.md +++ b/docs/config_files.md @@ -71,38 +71,44 @@ chunking keywords listed below are used to determine which files are primary obsids for a given file, and hence which steps must be completed before launching a particular task script. -### n_time_neighbors +### Time Chunking When running a workflow, it is sometimes desirable to operate on several files contiguous in time as a single chunk. There are several options that control how a full list of files is partitioned into a series of time-contiguous chunks that -are all operated on together as a single job in the workflow, referred to as -"time neighbors". When evaluating the workflow to determine which obsids to -operate on, the code defines a notion of "primary obsids". For each primary -obsid, a task script is run. Each obsid could be a primary obsid. However, it -is also possible to partition the list such that, e.g., every tenth file is a -primary obsid, and the others do not have corresponding task scripts generated -for them. The specific keywords that may be specified are: - -* `n_time_neighbors`: the number of files that are considered "time neighbors" - for a given primiary obsid. Must be a non-negative integer. Default is 0 - (i.e., no time neighbors will be used unless specified). +are all operated on together as a single job in the workflow, referred to as a +"time chunk". When evaluating the workflow to determine which obsids to operate +on, the code defines a notion of "primary obsids". For each primary obsid, a +task script is run. Each obsid could be a primary obsid. However, it is also +possible to partition the list such that, e.g., every tenth file is a primary +obsid, and the others do not have corresponding task scripts generated for +them. The specific keywords that may be specified are: + +* `chunk_size`: the total size of a given time chunk, in terms of the number of + files. In addition to an integer, can also be the string `"all"` to indicate + the chunk includes all time values. * `time_centered`: whether to treat a chunk of files such that the primary obsid - is in the center, with `n_time_neighbors` on either side for a total length of - 2 * `n_time_neighbors` + 1 (True), or as the start of a chunk of files with - total length of `n_time_neighbors` + 1 (False). Default is True. + is in the center the chunk (True), or the start of the chunk. If + `time_centered` is `True` and `chunk_size` is even, an extra entry is included + on the left to make the chunk symmetric about the chunk center. Default is + `True`. * `stride_length`: the number of obsids to stride by when generating the list of - primary obsids. For example, if `stride_length = 11`, and `n_time_neighbors = - 10`, and `time_centered` is `False`, the list will be partitioned into chunks - 11 files long with no overlap. Default is 1 (i.e., every obsid will be treated - as a primary obsid with the exception of those files within `n_time_neighbors` - of the edge). + primary obsids. For example, if `stride_length = 10`, `chunk_size=10`, and + `time_centered` is `False`, the list will be partitioned into chunks 10 files + long with no overlap. Default is 1 (i.e., every obsid will be treated as a + primary obsid with the exception of those files within `chunk_size` of + the edge). * `collect_stragglers`: determine how to handle lists that are not evenly divided by `stride_length`. If True, any files that would not evenly be added to a full group are instead added to the second-to-last group to make an "extra large" group, ensuring that all files are accounted for when processing. If False, these obsids will not be included in the list. Default is False. +* `prereq_chunk_size`: this option is specified if the user wants to wait for + specific entries in the previous step to finish before starting the current + one, without necessarily using them. Usually this will not be set, or it will + be `"all"` to indicate all entries for the previous step must be completed + before proceeding. ### mem From f3499284acebf88545bbd1be30ecd75fc517278e Mon Sep 17 00:00:00 2001 From: Paul La Plante Date: Fri, 24 Jul 2020 15:32:52 -0700 Subject: [PATCH 22/22] Increment version number --- hera_opm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hera_opm/VERSION b/hera_opm/VERSION index 9084fa2f..26aaba0e 100644 --- a/hera_opm/VERSION +++ b/hera_opm/VERSION @@ -1 +1 @@ -1.1.0 +1.2.0