layer.py

import numpy as np
import theano
import theano.tensor as T
from utils import create_shared, random_weights

floatX = theano.config.floatX
device = theano.config.device


class HiddenLayer(object):
    """
    Hidden layer with or without bias.
    Input: tensor of dimension (dim*, input_dim)
    Output: tensor of dimension (dim*, output_dim)
    """

    def __init__(self, input_dim, output_dim, bias=True, activation='sigmoid',
                 name='hidden_layer'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.bias = bias
        self.name = name
        if activation is None:
            self.activation = None
        elif activation == 'tanh':
            self.activation = T.tanh
        elif activation == 'sigmoid':
            self.activation = T.nnet.sigmoid
        elif activation == 'softmax':
            self.activation = T.nnet.softmax
        elif activation == 'relu':
            self.activation = T.nnet.relu
        else:
            raise Exception("Unknown activation function: %s" % activation)

        # Initialize weights and bias
        self.weights = create_shared(
            random_weights((input_dim, output_dim)),
            name + '__weights'
        )

        if activation == 'relu':
            self.bias = create_shared(np.ones((output_dim,)) * 0.1, name + '__bias')
        else:
            self.bias = create_shared(np.zeros((output_dim,)), name + '__bias')

        # Define parameters
        if self.bias:
            self.params = [self.weights, self.bias]
        else:
            self.params = [self.weights]

    def link(self, input):
        """
        The input has to be a tensor with the right
        most dimension equal to input_dim.
        """
        self.input = input
        self.linear_output = T.dot(self.input, self.weights)
        if self.bias:
            self.linear_output = self.linear_output + self.bias
        if self.activation is None:
            self.output = self.linear_output
        else:
            self.output = self.activation(self.linear_output)
        return self.output


class EmbeddingLayer(object):
    """
    Embedding layer: word embeddings representations
    Input: tensor of dimension (dim*) with values in range(0, input_dim)
    Output: tensor of dimension (dim*, output_dim)
    """

    def __init__(self, input_dim, output_dim, name='embedding_layer'):
        """
        Typically, input_dim is the vocabulary size,
        and output_dim the embedding dimension.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name

        # Randomly generate weights
        self.embeddings = create_shared(
            random_weights((input_dim, output_dim)),
            self.name + '__embeddings'
        )

        # Define parameters
        self.params = [self.embeddings]

    def link(self, input):
        """
        Return the embeddings of the given indexes.
        Input: tensor of shape (batch_size, sentence_length)
        Output: tensor of shape (batch_size, sentence_length, output_dim)
        """
        self.input = input
        # concat_indexes = self.input.flatten()
        #  __TODO__:check that
        # if device == 'cpu':
        #     indexed_rows = theano.sparse_grad(
        #         self.weights[concatenated_input]
        #     )
        # else:
        self.output = self.embeddings[self.input]
        return self.output


class DropoutLayer(object):
    """
    Dropout layer. Randomly set to 0 values of the input, with probability p.
    """

    def __init__(self, p=0.5, name='dropout_layer'):
        """
        p has to be between 0 and 1.
        p is the probability of dropping out a unit, so
        setting p to 0 is equivalent to have an identity layer.
        """
        assert 0. <= p < 1., p
        self.p = p
        self.rng = T.shared_randomstreams.RandomStreams(seed=123456)
        self.name = name

    def link(self, input):
        """
        Dropout link: we just apply mask to the input.
        """
        if self.p > 0:
            mask = self.rng.binomial(n=1, p=(1 - self.p), size=input.shape, dtype=floatX)
            self.output = input * mask
        else:
            self.output = input
        return self.output