Skip to content

Commit

Permalink
add stateful processing of recurrent layers
Browse files Browse the repository at this point in the history
added initialisation of hidden states to layers; fixes #230
renamed GRU parameters, to be consistend with all other layers
  • Loading branch information
Sebastian Böck committed Dec 8, 2016
1 parent 421fa8f commit eb0310e
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 50 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ Bug fixes:
API relevant changes:

* Reorder `GRUCell` parameters, to be consistent with all other layers (#235)
* Rename `GRULayer` parameters, to be consistent with all other layers (#235)

Other changes:

* `num_threads` is passed to `ParallelProcessor` in single mode (#217)
* Use `install_requires` in `setup.py` to specify dependencies (#226)
* Allow initialisation of previous/hidden states in RNNs (#235)


Version 0.14.1 (release date: 2016-08-01)
Expand Down
169 changes: 128 additions & 41 deletions madmom/ml/nn/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,43 +98,70 @@ class RecurrentLayer(FeedForwardLayer):
Recurrent weights.
activation_fn : numpy ufunc
Activation function.
init : numpy array, shape (num_hiddens,), optional
Initial state of hidden units.
"""

def __init__(self, weights, bias, recurrent_weights, activation_fn):
def __init__(self, weights, bias, recurrent_weights, activation_fn,
init=None):
super(RecurrentLayer, self).__init__(weights, bias, activation_fn)
self.recurrent_weights = recurrent_weights
if init is None:
init = np.zeros(self.bias.size, dtype=NN_DTYPE)
self.init = init
self._prev = init

def activate(self, data):
def activate(self, data, reset=True):
"""
Activate the layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# if we don't have recurrent weights, we don't have to loop
if self.recurrent_weights is None:
return super(RecurrentLayer, self).activate(data)
# reset state
if reset:
self.reset()
# weight input and add bias
out = np.dot(data, self.weights) + self.bias
# loop through all time steps
for i in range(len(data)):
# add weighted previous step
if i >= 1:
out[i] += np.dot(out[i - 1], self.recurrent_weights)
out[i] += np.dot(self._prev, self.recurrent_weights)
# apply activation function
self.activation_fn(out[i], out=out[i])
# set reference to current output
self._prev = out[i]
# return
return out

def reset(self, init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
"""
# TODO: old models do not have the init attribute, thus create it
# remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.bias.size, dtype=NN_DTYPE)
# reset previous time step to initial value
self._prev = self.init if init is None else init


class BidirectionalLayer(Layer):
"""
Expand Down Expand Up @@ -267,7 +294,7 @@ def __init__(self, weights, bias, recurrent_weights, activation_fn=tanh):
activation_fn=activation_fn)


class LSTMLayer(Layer):
class LSTMLayer(RecurrentLayer):
"""
Recurrent network layer with Long Short-Term Memory units.
Expand All @@ -283,66 +310,103 @@ class LSTMLayer(Layer):
Output gate.
activation_fn : numpy ufunc, optional
Activation function.
init : numpy array, shape (num_hiddens, ), optional
Initial state of the layer.
cell_init : numpy array, shape (num_hiddens, ), optional
Initial state of the cell.
"""

def __init__(self, input_gate, forget_gate, cell, output_gate,
activation_fn=tanh):
activation_fn=tanh, init=None, cell_init=None):
self.input_gate = input_gate
self.forget_gate = forget_gate
self.cell = cell
self.output_gate = output_gate
self.activation_fn = activation_fn

def activate(self, data):
# keep the state of the layer and cell
if init is None:
init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
self.init = init
self._prev = self.init
if cell_init is None:
cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
self.cell_init = cell_init
self._state = self.cell_init

def activate(self, data, reset=True):
"""
Activate the LSTM layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# init / reset previous time step
if reset:
self.reset()
# init arrays
size = len(data)
# output matrix for the whole sequence
out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
# output (of the previous time step)
out_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# state (of the previous time step)
state_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# process the input data
for i in range(size):
# cache input data
data_ = data[i]
# input gate:
# operate on current data, previous state and previous output
ig = self.input_gate.activate(data_, out_, state_)
# operate on current data, previous output and state
ig = self.input_gate.activate(data_, self._prev, self._state)
# forget gate:
# operate on current data, previous state and previous output
fg = self.forget_gate.activate(data_, out_, state_)
# operate on current data, previous output and state
fg = self.forget_gate.activate(data_, self._prev, self._state)
# cell:
# operate on current data and previous output
cell = self.cell.activate(data_, out_)
cell = self.cell.activate(data_, self._prev)
# internal state:
# weight the cell with the input gate
# and add the previous state weighted by the forget gate
state_ = cell * ig + state_ * fg
self._state = cell * ig + self._state * fg
# output gate:
# operate on current data, current state and previous output
og = self.output_gate.activate(data_, out_, state_)
# operate on current data, previous output and current state
og = self.output_gate.activate(data_, self._prev, self._state)
# output:
# apply activation function to state and weight by output gate
out_ = self.activation_fn(state_) * og
out[i] = out_
out[i] = self.activation_fn(self._state) * og
# set reference to current output
self._prev = out[i]
return out

def reset(self, init=None, cell_init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
cell_init : numpy array, shape (num_hiddens,), optional
Reset the cells to this initial state.
"""
# TODO: old models do not have the init attribute, thus create it
# remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
if not hasattr(self, 'cell_init'):
self.cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# reset previous time step and state to initial value
self._prev = self.init if init is None else init
self._state = self.cell_init if cell_init is None else cell_init


class GRUCell(object):
"""
Expand Down Expand Up @@ -411,7 +475,7 @@ def activate(self, data, prev, reset_gate):
return self.activation_fn(out)


class GRULayer(Layer):
class GRULayer(RecurrentLayer):
"""
Recurrent network layer with Gated Recurrent Units (GRU) as proposed in
[1]_.
Expand All @@ -424,7 +488,7 @@ class GRULayer(Layer):
Update gate.
cell : :class:`GRUCell`
GRU cell.
hid_init : numpy array, shape (num_hiddens,), optional
init : numpy array, shape (num_hiddens,), optional
Initial state of hidden units.
References
Expand All @@ -443,54 +507,77 @@ class GRULayer(Layer):
"""

def __init__(self, reset_gate, update_gate, cell, hid_init=None):
def __init__(self, reset_gate, update_gate, cell, init=None):
# init the gates
self.reset_gate = reset_gate
self.update_gate = update_gate
self.cell = cell
if hid_init is None:
hid_init = np.zeros(cell.bias.size, dtype=NN_DTYPE)
self.hid_init = hid_init
# keep the state of the layer
if init is None:
init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
self.init = init
self._prev = init

def activate(self, data):
def activate(self, data, reset=True):
"""
Activate the GRU layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# init / reset previous time step
if reset:
self.reset()
# init arrays
size = len(data)
# output matrix for the whole sequence
out = np.zeros((size, self.update_gate.bias.size), dtype=NN_DTYPE)
# output (of the previous time step)
out_ = self.hid_init
out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
# process the input data
for i in range(size):
# cache input data
data_ = data[i]
# reset gate:
# operate on current data and previous output
rg = self.reset_gate.activate(data_, out_)
rg = self.reset_gate.activate(data_, self._prev)
# update gate:
# operate on current data and previous output
ug = self.update_gate.activate(data_, out_)
ug = self.update_gate.activate(data_, self._prev)
# cell (implemented as in [1]):
# operate on current data, previous output and reset gate
cell = self.cell.activate(data_, out_, rg)
# output (activation)
out_ = ug * cell + (1 - ug) * out_
out[i] = out_
cell = self.cell.activate(data_, self._prev, rg)
# output:
out[i] = ug * cell + (1 - ug) * self._prev
# set reference to current output
self._prev = out[i]
return out

def reset(self, init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : scalar or numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
"""
# TODO: old models do not have the init attribute, thus create it
# remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# reset previous time step to initial value
self._prev = self.init if init is None else init


def _kernel_margins(kernel_shape, margin_shift):
"""
Expand Down
Loading

0 comments on commit eb0310e

Please sign in to comment.