-
Notifications
You must be signed in to change notification settings - Fork 19.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ops.random.alpha_dropout
and layers.AlphaDropout
#18940
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from keras import backend | ||
from keras.api_export import keras_export | ||
from keras.layers.layer import Layer | ||
|
||
|
||
@keras_export("keras.layers.AlphaDropout") | ||
class AlphaDropout(Layer): | ||
"""Applies Alpha Dropout to the input. | ||
|
||
Alpha Dropout is a `Dropout` that keeps mean and variance of inputs | ||
to their original values, in order to ensure the self-normalizing property | ||
even after this dropout. | ||
Alpha Dropout fits well to Scaled Exponential Linear Units (SELU) by | ||
randomly setting activations to the negative saturation value. | ||
|
||
Args: | ||
rate: Float between 0 and 1. The multiplicative noise will have | ||
standard deviation `sqrt(rate / (1 - rate))`. | ||
noise_shape: 1D integer tensor representing the shape of the | ||
binary alpha dropout mask that will be multiplied with the input. | ||
For instance, if your inputs have shape | ||
`(batch_size, timesteps, features)` and | ||
you want the alpha dropout mask to be the same for all timesteps, | ||
you can use `noise_shape=(batch_size, 1, features)`. | ||
seed: A Python integer to use as random seed. | ||
|
||
Call arguments: | ||
inputs: Input tensor (of any rank). | ||
training: Python boolean indicating whether the layer should behave in | ||
training mode (adding alpha dropout) or in inference mode | ||
(doing nothing). | ||
""" | ||
|
||
def __init__(self, rate, noise_shape=None, seed=None, **kwargs): | ||
super().__init__(**kwargs) | ||
if not 0 <= rate <= 1: | ||
raise ValueError( | ||
f"Invalid value received for argument " | ||
"`rate`. Expected a float value between 0 and 1. " | ||
f"Received: rate={rate}" | ||
) | ||
self.rate = rate | ||
self.seed = seed | ||
self.noise_shape = noise_shape | ||
self.seed_generator = backend.random.SeedGenerator(seed) | ||
self.supports_masking = True | ||
self.built = True | ||
|
||
def call(self, inputs, training=False): | ||
if training and self.rate > 0: | ||
return backend.random.alpha_dropout( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can implement the layer in terms of backend ops and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I've made the changes. |
||
inputs, | ||
self.rate, | ||
noise_shape=self.noise_shape, | ||
seed=self.seed_generator, | ||
) | ||
return inputs | ||
|
||
def compute_output_shape(self, input_shape): | ||
return input_shape | ||
|
||
def get_config(self): | ||
base_config = super().get_config() | ||
config = { | ||
"rate": self.rate, | ||
"seed": self.seed, | ||
"noise_shape": self.noise_shape, | ||
} | ||
return {**base_config, **config} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from keras import layers | ||
from keras import testing | ||
|
||
|
||
class AlphaDropoutTest(testing.TestCase): | ||
@pytest.mark.requires_trainable_backend | ||
def test_alpha_dropout_basics(self): | ||
self.run_layer_test( | ||
layers.AlphaDropout, | ||
init_kwargs={ | ||
"rate": 0.2, | ||
}, | ||
input_shape=(2, 3), | ||
expected_output_shape=(2, 3), | ||
expected_num_trainable_weights=0, | ||
expected_num_non_trainable_weights=0, | ||
expected_num_seed_generators=1, | ||
expected_num_losses=0, | ||
supports_masking=True, | ||
) | ||
|
||
def test_alpha_dropout_partial_noise_shape_dynamic(self): | ||
inputs = np.ones((20, 5, 10)) | ||
layer = layers.AlphaDropout(0.5, noise_shape=(None, 1, None)) | ||
outputs = layer(inputs, training=True) | ||
self.assertAllClose(outputs[:, 0, :], outputs[:, 1, :]) | ||
|
||
def test_alpha_dropout_partial_noise_shape_static(self): | ||
inputs = np.ones((20, 5, 10)) | ||
layer = layers.AlphaDropout(0.5, noise_shape=(20, 1, 10)) | ||
outputs = layer(inputs, training=True) | ||
self.assertAllClose(outputs[:, 0, :], outputs[:, 1, :]) | ||
|
||
def test_alpha_dropout_negative_rate(self): | ||
with self.assertRaisesRegex( | ||
ValueError, | ||
"Invalid value received for argument `rate`. " | ||
"Expected a float value between 0 and 1.", | ||
): | ||
_ = layers.AlphaDropout(rate=-0.5) | ||
|
||
def test_alpha_dropout_rate_greater_than_one(self): | ||
with self.assertRaisesRegex( | ||
ValueError, | ||
"Invalid value received for argument `rate`. " | ||
"Expected a float value between 0 and 1.", | ||
): | ||
_ = layers.AlphaDropout(rate=1.5) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any specific reason for having more than 6-7 decimal points?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In practice it will be cast to float32 so some precision will be lost. The number above is just taken from the paper.