From 58ac478cdb066214b488be14e58d8ef90f2a2ec4 Mon Sep 17 00:00:00 2001 From: Henry Leung Date: Wed, 3 Jan 2024 11:41:42 -0500 Subject: [PATCH] test both tensorflow and torch --- .github/workflows/ci_tests.yml | 27 +++++------ astroNN/config.py | 5 ++- astroNN/models/base_bayesian_cnn.py | 69 ++++++----------------------- astroNN/models/base_master_nn.py | 17 ++----- astroNN/models/base_vae.py | 28 +++++------- astroNN/nn/layers.py | 5 +-- astroNN/nn/utilities/generator.py | 8 ++-- astroNN/shared/nn_tools.py | 28 +++++++++++- setup.py | 2 - tests/test_apogee_model.py | 8 ++-- tests/test_models.py | 2 +- 11 files changed, 80 insertions(+), 119 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 9b553b0b..5ce67f5f 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -5,19 +5,19 @@ on: [push, pull_request] jobs: test: runs-on: ubuntu-latest - env: - SDSS_LOCAL_SAS_MIRROR: ./ci_data/ - GAIA_TOOLS_DATA: ./ci_data/ - LASMOT_DR5_DATA: ./ci_data/ - KERAS_BACKEND: torch strategy: fail-fast: false matrix: include: - - { TORCH_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false } - - { TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false } - - { TORCH_VER: 2.0.0, PAPER_MODELS: true, ALLOW_FAILURE: false } - - { TORCH_VER: 2.0.0, PAPER_MODELS: false, ALLOW_FAILURE: false } + - { BACKEND: torch, BACKEND_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false } + - { BACKEND: torch, TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false } + - { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: true, ALLOW_FAILURE: false } + - { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: false, ALLOW_FAILURE: false } + env: + SDSS_LOCAL_SAS_MIRROR: ./ci_data/ + GAIA_TOOLS_DATA: ./ci_data/ + LASMOT_DR5_DATA: ./ci_data/ + KERAS_BACKEND: ${{ matrix.BACKEND }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -26,14 +26,14 @@ jobs: python-version: "3.10" - name: Install dependencies run: | - pip install torch~=${{ matrix.TORCH_VER }} + pip install ${{ matrix.BACKEND }}~=${{ matrix.BACKEND_VER }} pip install keras coveralls pydot graphviz pytest pytest-cov pip install . - name: Cache test data uses: actions/cache@v3 with: path: ${{github.workspace}}/ci_data/ - key: astronn-${{ runner.os }}-${{ matrix.TORCH_VER }}-${{ matrix.PAPER_MODELS }}-2023april30 + key: astronn-${{ runner.os }}-${{ matrix.BACKEND_VER }}-${{ matrix.PAPER_MODELS }}-2023april30 - name: Test astroNN continue-on-error: ${{ matrix.ALLOW_FAILURE }} if: ${{ matrix.PAPER_MODELS == false}} @@ -54,11 +54,12 @@ jobs: continue-on-error: ${{ matrix.ALLOW_FAILURE }} uses: codecov/codecov-action@v3 env: - TORCH_VER: ${{ matrix.TORCH_VER }} + BACKEND: ${{ matrix.BACKEND }} + BACKEND_VER: ${{ matrix.BACKEND_VER }} PAPER_MODELS: ${{ matrix.PAPER_MODELS }} with: flags: unittests - env_vars: TORCH_VER, PAPER_MODELS + env_vars: BACKEND, BACKEND_VER, PAPER_MODELS name: codecov-astroNN fail_ci_if_error: true verbose: true diff --git a/astroNN/config.py b/astroNN/config.py index 9660d1c7..cef50236 100644 --- a/astroNN/config.py +++ b/astroNN/config.py @@ -8,9 +8,10 @@ astroNN_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".astroNN") _astroNN_MODEL_NAME = "model_weights.keras" # default astroNN model filename +_KERAS_BACKEND = keras.backend.backend() -if keras.backend.backend() != "torch": - raise ImportError(f"astroNN only support PyTorch backend, currently you have '{keras.backend.backend()}' as backend") +if _KERAS_BACKEND != "torch" or _KERAS_BACKEND != "tensorflow": + raise ImportError(f"astroNN only support Tensorflow and PyTorch backend, currently you have '{keras.backend.backend()}' as backend") def config_path(flag=None): diff --git a/astroNN/models/base_bayesian_cnn.py b/astroNN/models/base_bayesian_cnn.py index 9e24c679..767f1446 100644 --- a/astroNN/models/base_bayesian_cnn.py +++ b/astroNN/models/base_bayesian_cnn.py @@ -25,7 +25,6 @@ from astroNN.nn.utilities import Normalizer from astroNN.nn.utilities.generator import GeneratorMaster from astroNN.shared.warnings import deprecated, deprecated_copy_signature -from astroNN.shared.nn_tools import gpu_availability from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict from astroNN.nn.losses import ( @@ -544,7 +543,6 @@ def fit( inputs_err=None, labels_err=None, sample_weight=None, - experimental=False, ): """ Train a Bayesian neural network @@ -617,42 +615,15 @@ def fit( start_time = time.time() - if experimental: - dataset = ( - tf.data.Dataset.from_tensor_slices( - (norm_data_training, norm_labels_training, sample_weight_training) - ) - .batch(self.batch_size) - .shuffle(5000, reshuffle_each_iteration=True) - .prefetch(tf.data.AUTOTUNE) - ) - val_dataset = ( - tf.data.Dataset.from_tensor_slices( - (norm_data_val, norm_labels_val, sample_weight_val) - ) - .batch(self.batch_size) - .prefetch(tf.data.AUTOTUNE) - ) - - self.history = self.keras_model.fit( - dataset, - validation_data=val_dataset, - epochs=self.max_epochs, - verbose=self.verbose, - workers=os.cpu_count() // 2, - callbacks=self.__callbacks, - use_multiprocessing=MULTIPROCESS_FLAG, - ) - else: - self.history = self.keras_model.fit( - self.training_generator, - validation_data=self.validation_generator, - epochs=self.max_epochs, - verbose=self.verbose, - workers=os.cpu_count() // 2, - callbacks=self.__callbacks, - use_multiprocessing=MULTIPROCESS_FLAG, - ) + self.history = self.keras_model.fit( + self.training_generator, + validation_data=self.validation_generator, + epochs=self.max_epochs, + verbose=self.verbose, + workers=os.cpu_count() // 2, + callbacks=self.__callbacks, + use_multiprocessing=MULTIPROCESS_FLAG, + ) print(f"Completed Training, {(time.time() - start_time):.{2}f}s in total") if self.autosave is True: @@ -815,15 +786,8 @@ def predict(self, input_data, inputs_err=None, batch_size=None): """ self.has_model_check() - if gpu_availability() is False and self.mc_num > 25: - warnings.warn( - f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can " - f"potentially be very slow! \n " - f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n" - f"This is just a warning, and will not shown if mc_num < 25 on CPU" - ) - if self.mc_num < 2: - raise AttributeError("mc_num cannot be smaller than 2") + if self.mc_num < 2: + raise AttributeError("mc_num cannot be smaller than 2") # if no error array then just zeros if inputs_err is None: @@ -1026,15 +990,8 @@ def on_epoch_end(self): self.has_model_check() - if gpu_availability() is False and self.mc_num > 25: - warnings.warn( - f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can " - f"potentially be very slow! \n " - f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n" - f"This is just a warning, and will not shown if mc_num < 25 on CPU" - ) - if self.mc_num < 2: - raise AttributeError("mc_num cannot be smaller than 2") + if self.mc_num < 2: + raise AttributeError("mc_num cannot be smaller than 2") total_test_num = len(file) # Number of testing data diff --git a/astroNN/models/base_master_nn.py b/astroNN/models/base_master_nn.py index 831b80b7..e52254eb 100644 --- a/astroNN/models/base_master_nn.py +++ b/astroNN/models/base_master_nn.py @@ -11,16 +11,14 @@ import numpy as np import pylab as plt import keras -import tensorflow as tf -import keras as tfk -from tensorflow.python.keras.utils.layer_utils import count_params +from keras.utils.summary_utils import count_params import astroNN from astroNN.config import _astroNN_MODEL_NAME from astroNN.config import cpu_gpu_check from astroNN.shared.nn_tools import folder_runnum -epsilon, plot_model = tfk.backend.epsilon, tfk.utils.plot_model +epsilon, plot_model = keras.backend.epsilon, keras.utils.plot_model class NeuralNetMaster(ABC): @@ -67,7 +65,7 @@ def __init__(self): self._python_info = sys.version self._astronn_ver = astroNN.__version__ self._keras_ver = keras.__version__ - self._tf_ver = tf.__version__ + self._tf_ver = keras.__version__ self.currentdir = os.getcwd() self.folder_name = None self.fullfilepath = None @@ -814,15 +812,6 @@ def get_layer(self, *args, **kwargs): """ return self.keras_model.get_layer(*args, **kwargs) - def flush(self): - """ - | Experimental, I don't think it works - | Flush GPU memory from tensorflow - - :History: 2018-Jun-19 - Written - Henry Leung (University of Toronto) - """ - tfk.backend.clear_session() - def transfer_weights(self, model, exclusion_output=False): """ Transfer weight of a model to current model if possible diff --git a/astroNN/models/base_vae.py b/astroNN/models/base_vae.py index 82aa15e8..b7b40b56 100644 --- a/astroNN/models/base_vae.py +++ b/astroNN/models/base_vae.py @@ -5,7 +5,7 @@ import numpy as np from tqdm import tqdm -import keras as tfk +import keras from astroNN.config import MULTIPROCESS_FLAG from astroNN.config import _astroNN_MODEL_NAME from astroNN.datasets import H5Loader @@ -20,15 +20,13 @@ from astroNN.nn.utilities import Normalizer from astroNN.nn.utilities.generator import GeneratorMaster from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict -from astroNN.shared.warnings import deprecated, deprecated_copy_signature +from astroNN.shared.warnings import deprecated_copy_signature from sklearn.model_selection import train_test_split -import tensorflow as tf -from tensorflow.python.keras.engine import data_adapter -from tensorflow.python.util import nest +from keras.trainers.data_adapters import data_adapter_utils -regularizers = tfk.regularizers -ReduceLROnPlateau = tfk.callbacks.ReduceLROnPlateau -Adam = tfk.optimizers.Adam +regularizers = keras.regularizers +ReduceLROnPlateau = keras.callbacks.ReduceLROnPlateau +Adam = keras.optimizers.Adam class CVAEDataGenerator(GeneratorMaster): @@ -216,7 +214,7 @@ def compile( sample_weight_mode=None, ): self.keras_encoder, self.keras_decoder = self.model() - self.keras_model = tfk.Model( + self.keras_model = keras.Model( inputs=[self.keras_encoder.inputs], outputs=[self.keras_decoder(self.keras_encoder.outputs[2])], ) @@ -246,11 +244,11 @@ def compile( loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, ) - self.keras_model.total_loss_tracker = tfk.metrics.Mean(name="loss") - self.keras_model.reconstruction_loss_tracker = tfk.metrics.Mean( + self.keras_model.total_loss_tracker = keras.metrics.Mean(name="loss") + self.keras_model.reconstruction_loss_tracker = keras.metrics.Mean( name="reconstruction_loss" ) - self.keras_model.kl_loss_tracker = tfk.metrics.Mean(name="kl_loss") + self.keras_model.kl_loss_tracker = keras.metrics.Mean(name="kl_loss") # inject custom training step if needed try: @@ -295,8 +293,7 @@ def custom_train_step(self, data): :param data: :return: """ - data = data_adapter.expand_1d(data) - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data) # TODO: properly fix this y = y["output"] @@ -334,8 +331,7 @@ def custom_train_step(self, data): return return_metrics def custom_test_step(self, data): - data = data_adapter.expand_1d(data) - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data) y = y["output"] z_mean, z_log_var, z = self.keras_encoder(x, training=False) diff --git a/astroNN/nn/layers.py b/astroNN/nn/layers.py index 3cbd9277..e36a8054 100644 --- a/astroNN/nn/layers.py +++ b/astroNN/nn/layers.py @@ -1,12 +1,11 @@ import math import keras - +from keras.layers.input_spec import InputSpec epsilon = keras.backend.epsilon initializers = keras.initializers activations = keras.activations -Layer, Wrapper, InputSpec = keras.layers.Layer, keras.layers.Wrapper, keras.layers.InputSpec - +Layer, Wrapper = keras.layers.Layer, keras.layers.Wrapper class KLDivergenceLayer(Layer): """ diff --git a/astroNN/nn/utilities/generator.py b/astroNN/nn/utilities/generator.py index 0cf44276..0b017159 100644 --- a/astroNN/nn/utilities/generator.py +++ b/astroNN/nn/utilities/generator.py @@ -1,11 +1,9 @@ import numpy as np -import keras as tfk +import keras +from keras.trainers.data_adapters.py_dataset_adapter import PyDataset -Sequence = tfk.utils.Sequence - - -class GeneratorMaster(Sequence): +class GeneratorMaster(PyDataset): """ | Top-level class of astroNN data pipeline to generate data for NNs. | It is implemented based on Tensorflow data ``Sequence`` class. diff --git a/astroNN/shared/nn_tools.py b/astroNN/shared/nn_tools.py index 1eda0722..d9a6771f 100644 --- a/astroNN/shared/nn_tools.py +++ b/astroNN/shared/nn_tools.py @@ -4,6 +4,9 @@ import datetime import os import keras +import inspect +import warnings +from astroNN.config import _KERAS_BACKEND # TODO: removed gpu_memory_manage() and gpu_availability() as they are not used in astroNN @@ -19,10 +22,31 @@ def cpu_fallback(flag=True): | 2020-May-31 - Update for tf 2 | 2023-Dec-27 - Update for Keras 3.0 """ + + general_tf_warning_msg = ( + f"Tensorflow has already been initialized, {inspect.currentframe().f_code.co_name}() needs " + f"to be called before any Tensorflow operation, as a result this function will have no effect" + ) + if flag is True: - keras.backend.common.global_state.set_global_attribute("torch_device", "cpu") + if _KERAS_BACKEND == "torch": + keras.backend.common.global_state.set_global_attribute("torch_device", "cpu") + elif _KERAS_BACKEND == "tensorflow": + import tensorflow as tf + try: + tf.config.set_visible_devices([], "GPU") + except RuntimeError: + warnings.warn(general_tf_warning_msg) elif flag is False: - keras.backend.common.global_state.set_global_attribute("torch_device", "cuda") + if _KERAS_BACKEND == "torch": + keras.backend.common.global_state.set_global_attribute("torch_device", "cuda") + elif _KERAS_BACKEND == "tensorflow": + import tensorflow as tf + try: + gpu_phy_devices = tf.config.list_physical_devices("GPU") + tf.config.set_visible_devices(gpu_phy_devices, "GPU") + except RuntimeError: + warnings.warn(general_tf_warning_msg) else: raise ValueError("Unknown flag, can only be True of False!") diff --git a/setup.py b/setup.py index 9438fcf7..1096c319 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,6 @@ ) as f: long_description = f.read() -torch_min_version = "2.1.0" python_min_version = "3.9" setup( @@ -34,7 +33,6 @@ "scikit-learn", "tqdm", "packaging", - f"torch>={torch_min_version}", ], url="https://github.com/henrysky/astroNN", project_urls={ diff --git a/tests/test_apogee_model.py b/tests/test_apogee_model.py index b9cb3538..ab2dd88d 100644 --- a/tests/test_apogee_model.py +++ b/tests/test_apogee_model.py @@ -13,8 +13,6 @@ from astroNN.shared.downloader_tools import TqdmUpTo import keras -mnist = keras.datasets.mnist -utils = keras.utils _URL_ORIGIN = "https://www.astro.utoronto.ca/~hleung/shared/ci_data/" filename = "apogee_dr14_green.h5" @@ -273,9 +271,9 @@ def test_ApogeeKplerEchelle(self): - training, testing """ # Data preparation, keep the data size large (>800 data points to prevent issues) - (x_train, y_train), (x_test, y_test) = mnist.load_data() - y_train = utils.to_categorical(y_train, 10) - y_test = utils.to_categorical(y_test, 10) + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + y_train = keras.utils.to_categorical(y_train, 10) + y_test = keras.utils.to_categorical(y_test, 10) # To convert to desirable type y_train = y_train.astype(np.float32) y_test = y_test.astype(np.float32) diff --git a/tests/test_models.py b/tests/test_models.py index f443af62..c40a61d3 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -62,7 +62,7 @@ def test_mnist(self): mnist_test.save('mnist_test_accuracy') mnist_reloaded_again = load_folder("mnist_test_accuracy") # test with astype boolean deliberately - eval_result_again = mnist_reloaded_again.evaluate(x_test, utils.to_categorical(y_test, 10).astype(bool)) + eval_result_again = mnist_reloaded_again.evaluate(x_test, keras.utils.to_categorical(y_test, 10).astype(bool)) # assert saving again wont affect the model self.assertAlmostEqual(eval_result_again['loss'], eval_result['loss'], places=3)