diff --git a/dnn_reco/create_trafo_model.py b/dnn_reco/create_trafo_model.py index 47a2de5..80d510b 100755 --- a/dnn_reco/create_trafo_model.py +++ b/dnn_reco/create_trafo_model.py @@ -83,7 +83,11 @@ def main(config_files): ) with open(trafo_config_file, "w") as yaml_file: - yaml.dump(config, yaml_file, default_flow_style=False) + yaml.YAML(typ="full").dump( + config, + yaml_file, + default_flow_style=False, + ) data_transformer.save_trafo_model(config["trafo_model_path"]) # kill multiprocessing queues and workers diff --git a/dnn_reco/data_handler.py b/dnn_reco/data_handler.py index 633ff0a..d78efc7 100644 --- a/dnn_reco/data_handler.py +++ b/dnn_reco/data_handler.py @@ -129,7 +129,7 @@ def setup_with_config(self, config_file): Description """ with open(config_file, "r") as stream: - config_meta = yaml.safe_load(stream) + config_meta = yaml.YAML(typ="safe", pure=True).load(stream) self.label_names = config_meta["label_names"] self.label_name_dict = config_meta["label_name_dict"] diff --git a/dnn_reco/export_model.py b/dnn_reco/export_model.py index d6f16fb..0d08903 100755 --- a/dnn_reco/export_model.py +++ b/dnn_reco/export_model.py @@ -170,7 +170,7 @@ def main(config_files, output_folder, data_settings, logs): "num_misc": data_handler.num_misc, } with open(os.path.join(output_folder, "config_meta_data.yaml"), "w") as f: - yaml.dump(meta_data, f, default_flow_style=False) + yaml.YAML(typ="full").dump(meta_data, f, default_flow_style=False) # ------------------------------------ # Export package versions and git hash @@ -183,7 +183,9 @@ def main(config_files, output_folder, data_settings, logs): "pip_installed_packages": config["pip_installed_packages"], } with open(os.path.join(output_folder, "version_control.yaml"), "w") as f: - yaml.dump(version_control, f, default_flow_style=False) + yaml.YAML(typ="full").dump( + version_control, f, default_flow_style=False + ) # ------------------------------- # Export tensorflow training logs @@ -213,7 +215,7 @@ def export_data_settings(data_settings, output_folder): """ try: with open(data_settings, "r") as stream: - data_config = yaml.safe_load(stream) + data_config = yaml.YAML(typ="safe", pure=True).load(stream) except Exception as e: print(e) print("Falling back to modified SafeLoader") @@ -221,7 +223,7 @@ def export_data_settings(data_settings, output_folder): yaml.SafeLoader.add_constructor( "tag:yaml.org,2002:python/unicode", lambda _, node: node.value ) - data_config = dict(yaml.safe_load(stream)) + data_config = dict(yaml.YAML(typ="safe", pure=True).load(stream)) for k in [ "pulse_time_quantiles", @@ -292,7 +294,7 @@ def export_data_settings(data_settings, output_folder): with open( os.path.join(output_folder, "config_data_settings.yaml"), "w" ) as f: - yaml.dump(data_settings, f, default_flow_style=False) + yaml.YAML(typ="full").dump(data_settings, f, default_flow_style=False) if __name__ == "__main__": diff --git a/dnn_reco/ic3/modules.py b/dnn_reco/ic3/modules.py index e8fcabe..06c6ab4 100644 --- a/dnn_reco/ic3/modules.py +++ b/dnn_reco/ic3/modules.py @@ -124,7 +124,7 @@ def Configure(self): # ---------------------------------------------------------------- cfg_file = os.path.join(self._model_path, "config_data_settings.yaml") with open(cfg_file, "r") as stream: - data_config = yaml.safe_load(stream) + data_config = yaml.YAML(typ="safe", pure=True).load(stream) # Backwards compatibility for older exported models which did not # include this setting. In this case the separated format, e.g. diff --git a/dnn_reco/model.py b/dnn_reco/model.py index cf0025d..b069d7e 100644 --- a/dnn_reco/model.py +++ b/dnn_reco/model.py @@ -140,9 +140,9 @@ def _setup_training_config_saver(self): # Load training iterations dict if os.path.isfile(self._training_steps_file): - self._training_iterations_dict = yaml.safe_load( - open(self._training_steps_file) - ) + with open(self._training_steps_file, "r") as stream: + yaml_loader = yaml.YAML(typ="safe", pure=True) + self._training_iterations_dict = yaml_loader.load(stream) else: misc.print_warning( "Did not find {!r}. Creating new one".format( @@ -1134,12 +1134,14 @@ def _save_training_config(self, iteration): del training_config["tf_float_precision"] with open(self._training_config_file, "w") as yaml_file: - yaml.dump(training_config, yaml_file, default_flow_style=False) + yaml.YAML(typ="full").dump( + training_config, yaml_file, default_flow_style=False + ) # update number of training iterations in training_steps.yaml self._training_iterations_dict[self._training_step] = iteration with open(self._training_steps_file, "w") as yaml_file: - yaml.dump( + yaml.YAML(typ="full").dump( self._training_iterations_dict, yaml_file, default_flow_style=False, diff --git a/dnn_reco/modules/models/general_IC86_models.py b/dnn_reco/modules/models/general_IC86_models.py index 3d3d307..2302d06 100644 --- a/dnn_reco/modules/models/general_IC86_models.py +++ b/dnn_reco/modules/models/general_IC86_models.py @@ -99,7 +99,9 @@ def general_model_IC86( # apply DOM dropout, split and reshape DeepCore input X_IC78, X_DeepCore_upper, X_DeepCore_lower = preprocess_icecube_data( - is_training, shared_objects + is_training, + shared_objects, + seed=config["tf_random_seed"], ) # ----------------------------------- @@ -111,6 +113,7 @@ def general_model_IC86( name="Upper DeepCore", method_list="convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_upper_DeepCore_settings"] ) @@ -123,6 +126,7 @@ def general_model_IC86( name="Lower DeepCore", method_list="convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_lower_DeepCore_settings"] ) @@ -135,6 +139,7 @@ def general_model_IC86( is_training=is_training, method_list="hex_convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_IC78_settings"] ) @@ -159,7 +164,11 @@ def general_model_IC86( ) # dropout - layer_flat = tf.nn.dropout(layer_flat, rate=1 - (keep_prob_list[2])) + layer_flat = tf.nn.dropout( + layer_flat, + rate=1 - (keep_prob_list[2]), + seed=config["tf_random_seed"], + ) # ----------------------------------- # fully connected layers @@ -171,6 +180,7 @@ def general_model_IC86( input=layer_flat, keep_prob=keep_prob_list[3], is_training=is_training, + seed=config["tf_random_seed"], **fc_settings ) @@ -306,6 +316,7 @@ def general_model_IC86( input=unc_input, is_training=is_training, keep_prob=keep_prob_list[3], + seed=config["tf_random_seed"], **fc_unc_settings ) y_unc_pred_trafo = uncertainty_layers[-1] @@ -382,7 +393,9 @@ def general_model_IC86_opt4( # apply DOM dropout, split and reshape DeepCore input X_IC78, X_DeepCore_upper, X_DeepCore_lower = preprocess_icecube_data( - is_training, shared_objects + is_training, + shared_objects, + seed=config["tf_random_seed"], ) # ----------------------------------- @@ -394,6 +407,7 @@ def general_model_IC86_opt4( name="Upper DeepCore", method_list="convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_upper_DeepCore_settings"] ) @@ -406,6 +420,7 @@ def general_model_IC86_opt4( name="Lower DeepCore", method_list="convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_lower_DeepCore_settings"] ) @@ -418,6 +433,7 @@ def general_model_IC86_opt4( is_training=is_training, method_list="hex_convolution", keep_prob=keep_prob_list[1], + seed=config["tf_random_seed"], **config["conv_IC78_settings"] ) @@ -442,7 +458,11 @@ def general_model_IC86_opt4( ) # dropout - layer_flat = tf.nn.dropout(layer_flat, rate=1 - (keep_prob_list[2])) + layer_flat = tf.nn.dropout( + layer_flat, + rate=1 - (keep_prob_list[2]), + seed=config["tf_random_seed"], + ) # ----------------------------------- # fully connected layers @@ -454,6 +474,7 @@ def general_model_IC86_opt4( input=layer_flat, keep_prob=keep_prob_list[3], is_training=is_training, + seed=config["tf_random_seed"], **fc_settings ) @@ -632,6 +653,7 @@ def general_model_IC86_opt4( input=tf.stop_gradient(layer_flat), is_training=is_training, keep_prob=keep_prob_list[3], + seed=config["tf_random_seed"], **fc_unc_settings ) y_unc_pred_trafo = uncertainty_layers[-1] diff --git a/dnn_reco/modules/models/utils/model_utils.py b/dnn_reco/modules/models/utils/model_utils.py index 0a3efdb..adf009b 100644 --- a/dnn_reco/modules/models/utils/model_utils.py +++ b/dnn_reco/modules/models/utils/model_utils.py @@ -6,7 +6,7 @@ import tensorflow as tf -def preprocess_icecube_data(is_training, shared_objects): +def preprocess_icecube_data(is_training, shared_objects, seed=None): """Performs some basic preprocessing of IceCube input data. Applies drop out for whole DOMs. @@ -20,6 +20,8 @@ def preprocess_icecube_data(is_training, shared_objects): shared_objects : dict A dictionary containing settings and objects that are shared and passed on to sub modules. + seed : int, optional + Random seed for reproducibility. Returns ------- @@ -56,12 +58,14 @@ def preprocess_icecube_data(is_training, shared_objects): X_IC78, rate=1 - (keep_prob_list[0]), noise_shape=noise_shape_IC78, + seed=seed, ) X_DeepCore = tf.nn.dropout( X_DeepCore, rate=1 - (keep_prob_list[0]), noise_shape=noise_shape_DeepCore, + seed=seed, ) # ----------------------------------- diff --git a/dnn_reco/setup_manager.py b/dnn_reco/setup_manager.py index 3e469c7..8255e35 100644 --- a/dnn_reco/setup_manager.py +++ b/dnn_reco/setup_manager.py @@ -235,7 +235,9 @@ def _setup_config(self): else: config_name += "__" + file_base_name - config_update = yaml.safe_load(open(config_file)) + with open(config_file, "r") as stream: + config_update = yaml.YAML(typ="safe", pure=True).load(stream) + duplicates = set(new_config.keys()).intersection( set(config_update.keys()) ) diff --git a/tests_manual/test.py b/tests_manual/test.py index 334adfc..27c2283 100644 --- a/tests_manual/test.py +++ b/tests_manual/test.py @@ -5,6 +5,9 @@ import numpy as np +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + + class bcolors: HEADER = "\033[95m" OKBLUE = "\033[94m" @@ -33,9 +36,16 @@ def error(msg): "NuGen/NuTau/medium_energy/IC86_2013_holeice_30_v4/l5/1/DNN_l5_00000001.hdf5", ] -keys = [ +keys_warning = [ # sanity checks "I3EventHeader", + # ic3-labels labels + "LabelsDeepLearning", + "LabelsMCCascade", + "MCCascade", +] + +keys_error = [ # ic3-data input data to dnn_reco "dnn_data__charge_bins_bin_values", "dnn_data__charge_bins_bin_indices", @@ -47,10 +57,6 @@ def error(msg): "dnn_data_inputs9_InIceDSTPulses_bin_values", "dnn_data_inputs9_InIceDSTPulses_bin_indices", "dnn_data_inputs9_InIceDSTPulses_global_time_offset", - # ic3-labels labels - "LabelsDeepLearning", - "LabelsMCCascade", - "MCCascade", # dnn_reco results "DeepLearningReco_event_selection_cscdl3_300m_01", "DeepLearningReco_event_selection_cascade_monopod_starting_events_big_kernel_02", @@ -58,20 +64,23 @@ def error(msg): "DeepLearningReco_dnn_reco_paper_hese__m7_after_sys", ] -dir_original = "test_data/dnn_reco_test_01_base_v1_0_1_dev" -test_dirs = glob.glob("test_data/*") +dir_original = os.path.join( + SCRIPT_DIR, "test_data/dnn_reco_test_01_base_v1_0_1_dev" +) +test_dirs = glob.glob(os.path.join(SCRIPT_DIR, "test_data/*")) test_dirs.remove(dir_original) if len(test_dirs) == 0: raise ValueError("No test directories found!") +warnings = [] got_warning = False passed_test = True for dir_test in test_dirs: print("\nNow testing {!r} against {!r}".format(dir_test, dir_original)) for file_name in files: print("\n\tNow testing {!r}".format(file_name)) - for key in keys: + for key in keys_warning + keys_error: try: df_original = pd.read_hdf( os.path.join(dir_original, file_name), key=key @@ -87,26 +96,57 @@ def error(msg): assert (df_original.columns == df_test.columns).all() for k in df_original.columns: if "runtime" not in k: + + # set toleracnces + atol = 5e-6 + rtol = 5e-4 + rtol_fatal = rtol + if not np.allclose( df_original[k].values, df_test[k].values, - atol=5e-6, - rtol=5e-4, + atol=atol, + rtol=rtol, ): - if key == "LabelsDeepLearning": + # compute relative difference + diff = df_original[k].values - df_test[k].values + rel_diff = diff / np.abs(df_original[k].values) + rel_diff_max = np.max(np.abs(rel_diff)) + warnings.append( + [ + key, + k, + rel_diff_max, + rel_diff_max > rtol_fatal + and key in keys_error, + ] + ) + + mask = np.abs(rel_diff) > rtol + + if key in keys_warning: warning("\t\tWarning: mismatch for {}".format(k)) got_warning = True + elif key in keys_error: + if rel_diff_max > rtol_fatal: + passed_test = False + error("\t\tError: mismatch for {}".format(k)) + else: + warning( + "\t\tWarning: mismatch for {}".format(k) + ) + got_warning = True else: - error("\t\tError: mismatch for {}".format(k)) - passed_test = False - print( - "\t\t", - key, - k, - (df_original[k].values - df_test[k].values), - ) - print("\t\t", df_original[k].values) - print("\t\t", df_test[k].values) + raise KeyError("Unknown key {!r}".format(key)) + print(f"\t\tKey: {key} | column: {k}") + print("\t\tElement-wise difference:") + print("\t\t", diff[mask]) + print("\t\tRelative difference:") + print("\t\t", rel_diff[mask]) + print("\t\tOriginal:") + print("\t\t", df_original[k].values[mask]) + print("\t\tTest:") + print("\t\t", df_test[k].values[mask]) else: runtime_orig = np.mean(df_original[k].values) * 1000.0 runtime_orig_std = np.std(df_original[k].values) * 1000.0 @@ -125,6 +165,23 @@ def error(msg): ) ) +# print warnings +if len(warnings) > 0: + max_chars = 25 + print(f"\n{'Rel. diff.':8s} | {'Key':25s} | Column") + print("=" * (max_chars * 2 + 16)) + for key, k, max_rel_diff, fatal in warnings: + if len(k) > max_chars: + k = k[:3] + "..." + k[-(max_chars - 6) :] + if len(key) > max_chars: + key = key[:3] + "..." + key[-(max_chars - 6) :] + + msg = f"{max_rel_diff*100.:9.3f}% | {key:25s} | {k}" + if fatal: + print(bcolors.FAIL + msg + bcolors.ENDC) + else: + print(bcolors.WARNING + msg + bcolors.ENDC) + print("\n====================") print("=== Summary ========") print("====================") diff --git a/tests_manual/test_data_cfg.yaml b/tests_manual/test_data_cfg.yaml index 1c43fa1..1a21a45 100644 --- a/tests_manual/test_data_cfg.yaml +++ b/tests_manual/test_data_cfg.yaml @@ -116,7 +116,6 @@ datasets: # ------------------------------------------------------------- job_template: job_templates/cvmfs_python.sh script_name: general_i3_processing.py -cuda_home: /data/user/mhuennefeld/software/cuda/cuda-11.2 # add optional additions to the LD_LIBRARY_PATH # Note: '{ld_library_path_prepends}' is the default which does not add anything @@ -124,7 +123,7 @@ ld_library_path_prepends: '{ld_library_path_prepends}' # Defines environment variables that are set from python set_env_vars_from_python: { - # 'TF_DETERMINISTIC_OPS': '1', + 'TF_DETERMINISTIC_OPS': '1', } #----------------------------------------------- @@ -155,8 +154,8 @@ processing_steps: [ # Define environment for this processing step cvmfs_python: py2-v3.0.1, icetray_metaproject: simulation/V06-01-01, - python_user_base_cpu: /data/user/pgutjahr/software/virtual_envs/py2-v3.0.1, - python_user_base_gpu: /data/user/pgutjahr/software/virtual_envs/py2-v3.0.1, + python_user_base_cpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/py2-v3.0.1, + python_user_base_gpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/py2-v3.0.1, n_files_is_n_runs: True, @@ -175,10 +174,20 @@ processing_steps: [ # ---------------------------------- { # Define environment for this processing step + + # # newer version + # cvmfs_python: py3-v4.3.0, + # icetray_metaproject: icetray/v1.10.0, + # python_user_base_cpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/tensorflow_gpu_py3-v4.3.0, + # python_user_base_gpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/tensorflow_gpu_py3-v4.3.0, + # cuda_home: /data/user/mhuennefeld/software/cuda/cuda-11.8, + + # baseline cvmfs_python: py3-v4.2.1, icetray_metaproject: icetray/v1.5.1, python_user_base_cpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/tensorflow_gpu_py3-v4.2.1, python_user_base_gpu: /data/user/mhuennefeld/DNN_reco/virtualenvs/tensorflow_gpu_py3-v4.2.1, + cuda_home: /data/user/mhuennefeld/software/cuda/cuda-11.2, # define a list of tray segments to run tray_segments: [