Skip to content

Commit

Permalink
test both tensorflow and torch
Browse files Browse the repository at this point in the history
  • Loading branch information
henrysky committed Jan 3, 2024
1 parent 7c5e612 commit 58ac478
Show file tree
Hide file tree
Showing 11 changed files with 80 additions and 119 deletions.
27 changes: 14 additions & 13 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@ on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
env:
SDSS_LOCAL_SAS_MIRROR: ./ci_data/
GAIA_TOOLS_DATA: ./ci_data/
LASMOT_DR5_DATA: ./ci_data/
KERAS_BACKEND: torch
strategy:
fail-fast: false
matrix:
include:
- { TORCH_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
- { TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
- { TORCH_VER: 2.0.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
- { TORCH_VER: 2.0.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
- { BACKEND: torch, BACKEND_VER: 2.1.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
- { BACKEND: torch, TORCH_VER: 2.1.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
- { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: true, ALLOW_FAILURE: false }
- { BACKEND: tensorflow, TORCH_VER: 2.15.0, PAPER_MODELS: false, ALLOW_FAILURE: false }
env:
SDSS_LOCAL_SAS_MIRROR: ./ci_data/
GAIA_TOOLS_DATA: ./ci_data/
LASMOT_DR5_DATA: ./ci_data/
KERAS_BACKEND: ${{ matrix.BACKEND }}
steps:
- uses: actions/checkout@v4
- name: Set up Python
Expand All @@ -26,14 +26,14 @@ jobs:
python-version: "3.10"
- name: Install dependencies
run: |
pip install torch~=${{ matrix.TORCH_VER }}
pip install ${{ matrix.BACKEND }}~=${{ matrix.BACKEND_VER }}
pip install keras coveralls pydot graphviz pytest pytest-cov
pip install .
- name: Cache test data
uses: actions/cache@v3
with:
path: ${{github.workspace}}/ci_data/
key: astronn-${{ runner.os }}-${{ matrix.TORCH_VER }}-${{ matrix.PAPER_MODELS }}-2023april30
key: astronn-${{ runner.os }}-${{ matrix.BACKEND_VER }}-${{ matrix.PAPER_MODELS }}-2023april30
- name: Test astroNN
continue-on-error: ${{ matrix.ALLOW_FAILURE }}
if: ${{ matrix.PAPER_MODELS == false}}
Expand All @@ -54,11 +54,12 @@ jobs:
continue-on-error: ${{ matrix.ALLOW_FAILURE }}
uses: codecov/codecov-action@v3
env:
TORCH_VER: ${{ matrix.TORCH_VER }}
BACKEND: ${{ matrix.BACKEND }}
BACKEND_VER: ${{ matrix.BACKEND_VER }}
PAPER_MODELS: ${{ matrix.PAPER_MODELS }}
with:
flags: unittests
env_vars: TORCH_VER, PAPER_MODELS
env_vars: BACKEND, BACKEND_VER, PAPER_MODELS
name: codecov-astroNN
fail_ci_if_error: true
verbose: true
5 changes: 3 additions & 2 deletions astroNN/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

astroNN_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".astroNN")
_astroNN_MODEL_NAME = "model_weights.keras" # default astroNN model filename
_KERAS_BACKEND = keras.backend.backend()

if keras.backend.backend() != "torch":
raise ImportError(f"astroNN only support PyTorch backend, currently you have '{keras.backend.backend()}' as backend")
if _KERAS_BACKEND != "torch" or _KERAS_BACKEND != "tensorflow":
raise ImportError(f"astroNN only support Tensorflow and PyTorch backend, currently you have '{keras.backend.backend()}' as backend")


def config_path(flag=None):
Expand Down
69 changes: 13 additions & 56 deletions astroNN/models/base_bayesian_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from astroNN.nn.utilities import Normalizer
from astroNN.nn.utilities.generator import GeneratorMaster
from astroNN.shared.warnings import deprecated, deprecated_copy_signature
from astroNN.shared.nn_tools import gpu_availability
from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict

from astroNN.nn.losses import (
Expand Down Expand Up @@ -544,7 +543,6 @@ def fit(
inputs_err=None,
labels_err=None,
sample_weight=None,
experimental=False,
):
"""
Train a Bayesian neural network
Expand Down Expand Up @@ -617,42 +615,15 @@ def fit(

start_time = time.time()

if experimental:
dataset = (
tf.data.Dataset.from_tensor_slices(
(norm_data_training, norm_labels_training, sample_weight_training)
)
.batch(self.batch_size)
.shuffle(5000, reshuffle_each_iteration=True)
.prefetch(tf.data.AUTOTUNE)
)
val_dataset = (
tf.data.Dataset.from_tensor_slices(
(norm_data_val, norm_labels_val, sample_weight_val)
)
.batch(self.batch_size)
.prefetch(tf.data.AUTOTUNE)
)

self.history = self.keras_model.fit(
dataset,
validation_data=val_dataset,
epochs=self.max_epochs,
verbose=self.verbose,
workers=os.cpu_count() // 2,
callbacks=self.__callbacks,
use_multiprocessing=MULTIPROCESS_FLAG,
)
else:
self.history = self.keras_model.fit(
self.training_generator,
validation_data=self.validation_generator,
epochs=self.max_epochs,
verbose=self.verbose,
workers=os.cpu_count() // 2,
callbacks=self.__callbacks,
use_multiprocessing=MULTIPROCESS_FLAG,
)
self.history = self.keras_model.fit(
self.training_generator,
validation_data=self.validation_generator,
epochs=self.max_epochs,
verbose=self.verbose,
workers=os.cpu_count() // 2,
callbacks=self.__callbacks,
use_multiprocessing=MULTIPROCESS_FLAG,
)

print(f"Completed Training, {(time.time() - start_time):.{2}f}s in total")
if self.autosave is True:
Expand Down Expand Up @@ -815,15 +786,8 @@ def predict(self, input_data, inputs_err=None, batch_size=None):
"""
self.has_model_check()

if gpu_availability() is False and self.mc_num > 25:
warnings.warn(
f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can "
f"potentially be very slow! \n "
f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n"
f"This is just a warning, and will not shown if mc_num < 25 on CPU"
)
if self.mc_num < 2:
raise AttributeError("mc_num cannot be smaller than 2")
if self.mc_num < 2:
raise AttributeError("mc_num cannot be smaller than 2")

# if no error array then just zeros
if inputs_err is None:
Expand Down Expand Up @@ -1026,15 +990,8 @@ def on_epoch_end(self):

self.has_model_check()

if gpu_availability() is False and self.mc_num > 25:
warnings.warn(
f"You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can "
f"potentially be very slow! \n "
f"A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n"
f"This is just a warning, and will not shown if mc_num < 25 on CPU"
)
if self.mc_num < 2:
raise AttributeError("mc_num cannot be smaller than 2")
if self.mc_num < 2:
raise AttributeError("mc_num cannot be smaller than 2")

total_test_num = len(file) # Number of testing data

Expand Down
17 changes: 3 additions & 14 deletions astroNN/models/base_master_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,14 @@
import numpy as np
import pylab as plt
import keras
import tensorflow as tf
import keras as tfk
from tensorflow.python.keras.utils.layer_utils import count_params
from keras.utils.summary_utils import count_params

import astroNN
from astroNN.config import _astroNN_MODEL_NAME
from astroNN.config import cpu_gpu_check
from astroNN.shared.nn_tools import folder_runnum

epsilon, plot_model = tfk.backend.epsilon, tfk.utils.plot_model
epsilon, plot_model = keras.backend.epsilon, keras.utils.plot_model


class NeuralNetMaster(ABC):
Expand Down Expand Up @@ -67,7 +65,7 @@ def __init__(self):
self._python_info = sys.version
self._astronn_ver = astroNN.__version__
self._keras_ver = keras.__version__
self._tf_ver = tf.__version__
self._tf_ver = keras.__version__
self.currentdir = os.getcwd()
self.folder_name = None
self.fullfilepath = None
Expand Down Expand Up @@ -814,15 +812,6 @@ def get_layer(self, *args, **kwargs):
"""
return self.keras_model.get_layer(*args, **kwargs)

def flush(self):
"""
| Experimental, I don't think it works
| Flush GPU memory from tensorflow
:History: 2018-Jun-19 - Written - Henry Leung (University of Toronto)
"""
tfk.backend.clear_session()

def transfer_weights(self, model, exclusion_output=False):
"""
Transfer weight of a model to current model if possible
Expand Down
28 changes: 12 additions & 16 deletions astroNN/models/base_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
from tqdm import tqdm
import keras as tfk
import keras
from astroNN.config import MULTIPROCESS_FLAG
from astroNN.config import _astroNN_MODEL_NAME
from astroNN.datasets import H5Loader
Expand All @@ -20,15 +20,13 @@
from astroNN.nn.utilities import Normalizer
from astroNN.nn.utilities.generator import GeneratorMaster
from astroNN.shared.dict_tools import dict_np_to_dict_list, list_to_dict
from astroNN.shared.warnings import deprecated, deprecated_copy_signature
from astroNN.shared.warnings import deprecated_copy_signature
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.python.keras.engine import data_adapter
from tensorflow.python.util import nest
from keras.trainers.data_adapters import data_adapter_utils

regularizers = tfk.regularizers
ReduceLROnPlateau = tfk.callbacks.ReduceLROnPlateau
Adam = tfk.optimizers.Adam
regularizers = keras.regularizers
ReduceLROnPlateau = keras.callbacks.ReduceLROnPlateau
Adam = keras.optimizers.Adam


class CVAEDataGenerator(GeneratorMaster):
Expand Down Expand Up @@ -216,7 +214,7 @@ def compile(
sample_weight_mode=None,
):
self.keras_encoder, self.keras_decoder = self.model()
self.keras_model = tfk.Model(
self.keras_model = keras.Model(
inputs=[self.keras_encoder.inputs],
outputs=[self.keras_decoder(self.keras_encoder.outputs[2])],
)
Expand Down Expand Up @@ -246,11 +244,11 @@ def compile(
loss_weights=loss_weights,
sample_weight_mode=sample_weight_mode,
)
self.keras_model.total_loss_tracker = tfk.metrics.Mean(name="loss")
self.keras_model.reconstruction_loss_tracker = tfk.metrics.Mean(
self.keras_model.total_loss_tracker = keras.metrics.Mean(name="loss")
self.keras_model.reconstruction_loss_tracker = keras.metrics.Mean(
name="reconstruction_loss"
)
self.keras_model.kl_loss_tracker = tfk.metrics.Mean(name="kl_loss")
self.keras_model.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

# inject custom training step if needed
try:
Expand Down Expand Up @@ -295,8 +293,7 @@ def custom_train_step(self, data):
:param data:
:return:
"""
data = data_adapter.expand_1d(data)
x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data)
# TODO: properly fix this
y = y["output"]

Expand Down Expand Up @@ -334,8 +331,7 @@ def custom_train_step(self, data):
return return_metrics

def custom_test_step(self, data):
data = data_adapter.expand_1d(data)
x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
x, y, sample_weight = data_adapter_utils.unpack_x_y_sample_weight(data)
y = y["output"]

z_mean, z_log_var, z = self.keras_encoder(x, training=False)
Expand Down
5 changes: 2 additions & 3 deletions astroNN/nn/layers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import math
import keras

from keras.layers.input_spec import InputSpec

epsilon = keras.backend.epsilon
initializers = keras.initializers
activations = keras.activations
Layer, Wrapper, InputSpec = keras.layers.Layer, keras.layers.Wrapper, keras.layers.InputSpec

Layer, Wrapper = keras.layers.Layer, keras.layers.Wrapper

class KLDivergenceLayer(Layer):
"""
Expand Down
8 changes: 3 additions & 5 deletions astroNN/nn/utilities/generator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import numpy as np

import keras as tfk
import keras
from keras.trainers.data_adapters.py_dataset_adapter import PyDataset

Sequence = tfk.utils.Sequence


class GeneratorMaster(Sequence):
class GeneratorMaster(PyDataset):
"""
| Top-level class of astroNN data pipeline to generate data for NNs.
| It is implemented based on Tensorflow data ``Sequence`` class.
Expand Down
28 changes: 26 additions & 2 deletions astroNN/shared/nn_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import datetime
import os
import keras
import inspect
import warnings
from astroNN.config import _KERAS_BACKEND

# TODO: removed gpu_memory_manage() and gpu_availability() as they are not used in astroNN

Expand All @@ -19,10 +22,31 @@ def cpu_fallback(flag=True):
| 2020-May-31 - Update for tf 2
| 2023-Dec-27 - Update for Keras 3.0
"""

general_tf_warning_msg = (
f"Tensorflow has already been initialized, {inspect.currentframe().f_code.co_name}() needs "
f"to be called before any Tensorflow operation, as a result this function will have no effect"
)

if flag is True:
keras.backend.common.global_state.set_global_attribute("torch_device", "cpu")
if _KERAS_BACKEND == "torch":
keras.backend.common.global_state.set_global_attribute("torch_device", "cpu")
elif _KERAS_BACKEND == "tensorflow":
import tensorflow as tf
try:
tf.config.set_visible_devices([], "GPU")
except RuntimeError:
warnings.warn(general_tf_warning_msg)
elif flag is False:
keras.backend.common.global_state.set_global_attribute("torch_device", "cuda")
if _KERAS_BACKEND == "torch":
keras.backend.common.global_state.set_global_attribute("torch_device", "cuda")
elif _KERAS_BACKEND == "tensorflow":
import tensorflow as tf
try:
gpu_phy_devices = tf.config.list_physical_devices("GPU")
tf.config.set_visible_devices(gpu_phy_devices, "GPU")
except RuntimeError:
warnings.warn(general_tf_warning_msg)
else:
raise ValueError("Unknown flag, can only be True of False!")

Expand Down
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
) as f:
long_description = f.read()

torch_min_version = "2.1.0"
python_min_version = "3.9"

setup(
Expand All @@ -34,7 +33,6 @@
"scikit-learn",
"tqdm",
"packaging",
f"torch>={torch_min_version}",
],
url="https://github.com/henrysky/astroNN",
project_urls={
Expand Down
Loading

0 comments on commit 58ac478

Please sign in to comment.