test both tensorflow and torch

henrysky · Jan 3, 2024 · 58ac478 · 58ac478
1 parent 7c5e612
commit 58ac478
Show file tree

Hide file tree

Showing 11 changed files with 80 additions and 119 deletions.
diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -5,19 +5,19 @@ on: [push, pull_request]
 jobs:
   test:
     runs-on: ubuntu-latest
-    env:
-      SDSS_LOCAL_SAS_MIRROR: ./ci_data/
-      GAIA_TOOLS_DATA: ./ci_data/
-      LASMOT_DR5_DATA: ./ci_data/
-      KERAS_BACKEND: torch
     strategy:
       fail-fast: false
       matrix:
         include:
-          - { TORCH_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
-          - { TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
-          - { TORCH_VER: 2.0.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
-          - { TORCH_VER: 2.0.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
+          - { BACKEND: torch, BACKEND_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
+          - { BACKEND: torch, TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
+          - { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
+          - { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
+    env:
+      SDSS_LOCAL_SAS_MIRROR: ./ci_data/
+      GAIA_TOOLS_DATA: ./ci_data/
+      LASMOT_DR5_DATA: ./ci_data/
+      KERAS_BACKEND: ${{ matrix.BACKEND }}
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -26,14 +26,14 @@ jobs:
           python-version: "3.10"
       - name: Install dependencies
         run: |
-          pip install torch~=${{ matrix.TORCH_VER }}
+          pip install ${{ matrix.BACKEND }}~=${{ matrix.BACKEND_VER }}
           pip install keras coveralls pydot graphviz pytest pytest-cov
           pip install .
       - name: Cache test data
         uses: actions/cache@v3
         with:
           path: ${{github.workspace}}/ci_data/
-          key: astronn-${{ runner.os }}-${{ matrix.TORCH_VER }}-${{ matrix.PAPER_MODELS }}-2023april30
+          key: astronn-${{ runner.os }}-${{ matrix.BACKEND_VER }}-${{ matrix.PAPER_MODELS }}-2023april30
       - name: Test astroNN
         continue-on-error: ${{ matrix.ALLOW_FAILURE }}
         if: ${{ matrix.PAPER_MODELS == false}}
@@ -54,11 +54,12 @@ jobs:
         continue-on-error: ${{ matrix.ALLOW_FAILURE }}
         uses: codecov/codecov-action@v3
         env:
-          TORCH_VER: ${{ matrix.TORCH_VER }}
+          BACKEND: ${{ matrix.BACKEND }}
+          BACKEND_VER: ${{ matrix.BACKEND_VER }}
           PAPER_MODELS: ${{ matrix.PAPER_MODELS }}
         with:
           flags: unittests
-          env_vars: TORCH_VER, PAPER_MODELS
+          env_vars: BACKEND, BACKEND_VER, PAPER_MODELS
           name: codecov-astroNN
           fail_ci_if_error: true
           verbose: true
diff --git a/astroNN/config.py b/astroNN/config.py
@@ -8,9 +8,10 @@
 
 astroNN_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".astroNN")
 _astroNN_MODEL_NAME = "model_weights.keras"  # default astroNN model filename
+_KERAS_BACKEND = keras.backend.backend()
 
-if keras.backend.backend() != "torch":
-    raise ImportError(f"astroNN only support PyTorch backend, currently you have '{keras.backend.backend()}' as backend")
+if _KERAS_BACKEND != "torch" or _KERAS_BACKEND != "tensorflow":
+    raise ImportError(f"astroNN only support Tensorflow and PyTorch backend, currently you have '{keras.backend.backend()}' as backend")
 
 
 def config_path(flag=None):

diff --git a/astroNN/models/base_bayesian_cnn.py b/astroNN/models/base_bayesian_cnn.py
@@ -25,7 +25,6 @@
 from astroNN.nn.utilities import Normalizer
 from astroNN.nn.utilities.generator import GeneratorMaster
 from astroNN.shared.warnings import deprecated, deprecated_copy_signature
-from astroNN.shared.nn_tools import gpu_availability
 from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict
 
 from astroNN.nn.losses import (
@@ -544,7 +543,6 @@ def fit(
         inputs_err=None,
         labels_err=None,
         sample_weight=None,
-        experimental=False,
     ):
         """
         Train a Bayesian neural network
@@ -617,42 +615,15 @@ def fit(
 
         start_time = time.time()
 
-        if experimental:
-            dataset = (
-                tf.data.Dataset.from_tensor_slices(
-                    (norm_data_training, norm_labels_training, sample_weight_training)
-                )
-                .batch(self.batch_size)
-                .shuffle(5000, reshuffle_each_iteration=True)
-                .prefetch(tf.data.AUTOTUNE)
-            )
-            val_dataset = (
-                tf.data.Dataset.from_tensor_slices(
-                    (norm_data_val, norm_labels_val, sample_weight_val)
-                )
-                .batch(self.batch_size)
-                .prefetch(tf.data.AUTOTUNE)
-            )
-
-            self.history = self.keras_model.fit(
-                dataset,
-                validation_data=val_dataset,
-                epochs=self.max_epochs,
-                verbose=self.verbose,
-                workers=os.cpu_count() // 2,
-                callbacks=self.__callbacks,
-                use_multiprocessing=MULTIPROCESS_FLAG,
-            )
-        else:
-            self.history = self.keras_model.fit(
-                self.training_generator,
-                validation_data=self.validation_generator,
-                epochs=self.max_epochs,
-                verbose=self.verbose,
-                workers=os.cpu_count() // 2,
-                callbacks=self.__callbacks,
-                use_multiprocessing=MULTIPROCESS_FLAG,
-            )
+        self.history = self.keras_model.fit(
+            self.training_generator,
+            validation_data=self.validation_generator,
+            epochs=self.max_epochs,
+            verbose=self.verbose,
+            workers=os.cpu_count() // 2,
+            callbacks=self.__callbacks,
+            use_multiprocessing=MULTIPROCESS_FLAG,
+        )
 
         print(f"Completed Training, {(time.time() - start_time):.{2}f}s in total")
         if self.autosave is True:
@@ -815,15 +786,8 @@ def predict(self, input_data, inputs_err=None, batch_size=None):
         """
         self.has_model_check()
 
-        if gpu_availability() is False and self.mc_num > 25:
-            warnings.warn(
-                f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can "
-                f"potentially be very slow! \n "
-                f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n"
-                f"This is just a warning, and will not shown if mc_num < 25 on CPU"
-            )
-            if self.mc_num < 2:
-                raise AttributeError("mc_num cannot be smaller than 2")
+        if self.mc_num < 2:
+            raise AttributeError("mc_num cannot be smaller than 2")
 
         # if no error array then just zeros
         if inputs_err is None:
@@ -1026,15 +990,8 @@ def on_epoch_end(self):
 
         self.has_model_check()
 
-        if gpu_availability() is False and self.mc_num > 25:
-            warnings.warn(
-                f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can "
-                f"potentially be very slow! \n "
-                f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n"
-                f"This is just a warning, and will not shown if mc_num < 25 on CPU"
-            )
-            if self.mc_num < 2:
-                raise AttributeError("mc_num cannot be smaller than 2")
+        if self.mc_num < 2:
+            raise AttributeError("mc_num cannot be smaller than 2")
 
         total_test_num = len(file)  # Number of testing data
 

diff --git a/astroNN/models/base_master_nn.py b/astroNN/models/base_master_nn.py
@@ -11,16 +11,14 @@
 import numpy as np
 import pylab as plt
 import keras
-import tensorflow as tf
-import keras as tfk
-from tensorflow.python.keras.utils.layer_utils import count_params
+from keras.utils.summary_utils import count_params
 
 import astroNN
 from astroNN.config import _astroNN_MODEL_NAME
 from astroNN.config import cpu_gpu_check
 from astroNN.shared.nn_tools import folder_runnum
 
-epsilon, plot_model = tfk.backend.epsilon, tfk.utils.plot_model
+epsilon, plot_model = keras.backend.epsilon, keras.utils.plot_model
 
 
 class NeuralNetMaster(ABC):
@@ -67,7 +65,7 @@ def __init__(self):
         self._python_info = sys.version
         self._astronn_ver = astroNN.__version__
         self._keras_ver = keras.__version__
-        self._tf_ver = tf.__version__
+        self._tf_ver = keras.__version__
         self.currentdir = os.getcwd()
         self.folder_name = None
         self.fullfilepath = None
@@ -814,15 +812,6 @@ def get_layer(self, *args, **kwargs):
         """
         return self.keras_model.get_layer(*args, **kwargs)
 
-    def flush(self):
-        """
-        | Experimental, I don't think it works
-        | Flush GPU memory from tensorflow
-
-        :History: 2018-Jun-19 - Written - Henry Leung (University of Toronto)
-        """
-        tfk.backend.clear_session()
-
     def transfer_weights(self, model, exclusion_output=False):
         """
         Transfer weight of a model to current model if possible

diff --git a/astroNN/models/base_vae.py b/astroNN/models/base_vae.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 from tqdm import tqdm
-import keras as tfk
+import keras
 from astroNN.config import MULTIPROCESS_FLAG
 from astroNN.config import _astroNN_MODEL_NAME
 from astroNN.datasets import H5Loader
@@ -20,15 +20,13 @@
 from astroNN.nn.utilities import Normalizer
 from astroNN.nn.utilities.generator import GeneratorMaster
 from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict
-from astroNN.shared.warnings import deprecated, deprecated_copy_signature
+from astroNN.shared.warnings import deprecated_copy_signature
 from sklearn.model_selection import train_test_split
-import tensorflow as tf
-from tensorflow.python.keras.engine import data_adapter
-from tensorflow.python.util import nest
+from keras.trainers.data_adapters import data_adapter_utils
 
-regularizers = tfk.regularizers
-ReduceLROnPlateau = tfk.callbacks.ReduceLROnPlateau
-Adam = tfk.optimizers.Adam
+regularizers = keras.regularizers
+ReduceLROnPlateau = keras.callbacks.ReduceLROnPlateau
+Adam = keras.optimizers.Adam
 
 
 class CVAEDataGenerator(GeneratorMaster):
@@ -216,7 +214,7 @@ def compile(
         sample_weight_mode=None,
     ):
         self.keras_encoder, self.keras_decoder = self.model()
-        self.keras_model = tfk.Model(
+        self.keras_model = keras.Model(
             inputs=[self.keras_encoder.inputs],
             outputs=[self.keras_decoder(self.keras_encoder.outputs[2])],
         )
@@ -246,11 +244,11 @@ def compile(
             loss_weights=loss_weights,
             sample_weight_mode=sample_weight_mode,
         )
-        self.keras_model.total_loss_tracker = tfk.metrics.Mean(name="loss")
-        self.keras_model.reconstruction_loss_tracker = tfk.metrics.Mean(
+        self.keras_model.total_loss_tracker = keras.metrics.Mean(name="loss")
+        self.keras_model.reconstruction_loss_tracker = keras.metrics.Mean(
             name="reconstruction_loss"
         )
-        self.keras_model.kl_loss_tracker = tfk.metrics.Mean(name="kl_loss")
+        self.keras_model.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
 
         # inject custom training step if needed
         try:
@@ -295,8 +293,7 @@ def custom_train_step(self, data):
         :param data:
         :return:
         """
-        data = data_adapter.expand_1d(data)
-        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
+        x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data)
         # TODO: properly fix this
         y = y["output"]
 
@@ -334,8 +331,7 @@ def custom_train_step(self, data):
         return return_metrics
 
     def custom_test_step(self, data):
-        data = data_adapter.expand_1d(data)
-        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
+        x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data)
         y = y["output"]
 
         z_mean, z_log_var, z = self.keras_encoder(x, training=False)

diff --git a/astroNN/nn/layers.py b/astroNN/nn/layers.py
@@ -1,12 +1,11 @@
 import math
 import keras
-
+from keras.layers.input_spec import InputSpec
 
 epsilon = keras.backend.epsilon
 initializers = keras.initializers
 activations = keras.activations
-Layer, Wrapper, InputSpec = keras.layers.Layer, keras.layers.Wrapper, keras.layers.InputSpec
-
+Layer, Wrapper = keras.layers.Layer, keras.layers.Wrapper
 
 class KLDivergenceLayer(Layer):
     """

diff --git a/astroNN/nn/utilities/generator.py b/astroNN/nn/utilities/generator.py
@@ -1,11 +1,9 @@
 import numpy as np
 
-import keras as tfk
+import keras
+from keras.trainers.data_adapters.py_dataset_adapter import PyDataset
 
-Sequence = tfk.utils.Sequence
-
-
-class GeneratorMaster(Sequence):
+class GeneratorMaster(PyDataset):
     """
     | Top-level class of astroNN data pipeline to generate data for NNs.
     | It is implemented based on Tensorflow data ``Sequence`` class.

diff --git a/astroNN/shared/nn_tools.py b/astroNN/shared/nn_tools.py
@@ -4,6 +4,9 @@
 import datetime
 import os
 import keras
+import inspect
+import warnings
+from astroNN.config import _KERAS_BACKEND
 
 # TODO: removed gpu_memory_manage() and gpu_availability() as they are not used in astroNN
 
@@ -19,10 +22,31 @@ def cpu_fallback(flag=True):
         | 2020-May-31 - Update for tf 2
         | 2023-Dec-27 - Update for Keras 3.0
     """
+
+    general_tf_warning_msg = (
+        f"Tensorflow has already been initialized, {inspect.currentframe().f_code.co_name}() needs "
+        f"to be called before any Tensorflow operation, as a result this function will have no effect"
+    )
+
     if flag is True:
-        keras.backend.common.global_state.set_global_attribute("torch_device", "cpu")
+        if _KERAS_BACKEND == "torch":
+            keras.backend.common.global_state.set_global_attribute("torch_device", "cpu")
+        elif _KERAS_BACKEND == "tensorflow":
+            import tensorflow as tf
+            try:
+                tf.config.set_visible_devices([], "GPU")
+            except RuntimeError:
+                warnings.warn(general_tf_warning_msg)
     elif flag is False:
-        keras.backend.common.global_state.set_global_attribute("torch_device", "cuda")
+        if _KERAS_BACKEND == "torch":
+            keras.backend.common.global_state.set_global_attribute("torch_device", "cuda")
+        elif _KERAS_BACKEND == "tensorflow":
+            import tensorflow as tf
+            try:
+                gpu_phy_devices = tf.config.list_physical_devices("GPU")
+                tf.config.set_visible_devices(gpu_phy_devices, "GPU")
+            except RuntimeError:
+                warnings.warn(general_tf_warning_msg)
     else:
         raise ValueError("Unknown flag, can only be True of False!")
 

diff --git a/setup.py b/setup.py
@@ -7,7 +7,6 @@
 ) as f:
     long_description = f.read()
 
-torch_min_version = "2.1.0"
 python_min_version = "3.9"
 
 setup(
@@ -34,7 +33,6 @@
         "scikit-learn",
         "tqdm",
         "packaging",
-        f"torch>={torch_min_version}",
     ],
     url="https://github.com/henrysky/astroNN",
     project_urls={