add stateful processing of recurrent layers

added initialisation of hidden states to layers; fixes #230 renamed GRU parameters to be consistend with all other layers
CPJKU · Jan 19, 2017 · 1ce9cf6 · 1ce9cf6
1 parent a2165a6
commit 1ce9cf6
Show file tree

Hide file tree

Showing 4 changed files with 483 additions and 93 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -17,11 +17,13 @@ Bug fixes:
 API relevant changes:
 
 * Reorder `GRUCell` parameters, to be consistent with all other layers (#243)
+* Rename `GRULayer` parameters, to be consistent with all other layers (#243)
 
 Other changes:
 
 * `num_threads` is passed to `ParallelProcessor` in single mode (#217)
 * Use `install_requires` in `setup.py` to specify dependencies (#226)
+* Allow initialisation of previous/hidden states in RNNs (#243)
 
 
 Version 0.14.1 (release date: 2016-08-01)

diff --git a/madmom/ml/nn/__init__.py b/madmom/ml/nn/__init__.py
@@ -71,13 +71,53 @@ class NeuralNetwork(Processor):
 
     """
 
-    def __init__(self, layers):
+    def __init__(self, layers, online=False):
         self.layers = layers
+        self.online = online
+
+    def __setstate__(self, state):
+        # restore instance attributes
+        self.__dict__.update(state)
+        # TODO: old models do not have the online attribute, thus create it
+        #       remove this initialisation code after updating the models
+        #       At least we should pudate the online models so that we can set
+        #       the default value to False
+        if not hasattr(self, 'online'):
+            self.online = None
 
     def process(self, data):
         """
         Process the given data with the neural network.
 
+        Parameters
+        ----------
+        data : numpy array
+            Activate the network with this data.
+
+        Returns
+        -------
+        numpy array
+            Network predictions for this data.
+
+        Notes
+        -----
+        Depending on online/offline mode the predictions are either reported
+        on a step-by-step basis or for the whole sequence, respectively.
+
+        """
+        if self.online:
+            data = self.process_step(data)
+        else:
+            data = self.process_sequence(data)
+        # ravel the predictions if needed
+        if data.ndim == 2 and data.shape[1] == 1:
+            data = data.ravel()
+        return data
+
+    def process_sequence(self, data):
+        """
+        Process the given data with the neural network.
+
         Parameters
         ----------
         data : numpy array
@@ -96,11 +136,39 @@ def process(self, data):
         for layer in self.layers:
             # activate the layer and feed the output into the next one
             data = layer(data)
-        # ravel the predictions if needed
-        if data.ndim == 2 and data.shape[1] == 1:
-            data = data.ravel()
+        # return the data
         return data
 
+    def process_step(self, data):
+        """
+        Process the given data with the neural network step-by-step.
+
+        Parameters
+        ----------
+        data : numpy array
+            Activate the network with this data.
+
+        Returns
+        -------
+        numpy array
+            Network predictions for this data.
+
+        """
+        # loop over all layers
+        for layer in self.layers:
+            # activate the layer and feed the output into the next one
+            data = layer.activate_step(data)
+        # return the data
+        return data
+
+    def reset(self):
+        """
+        Reset the neural network to its initial state.
+
+        """
+        for layer in self.layers:
+            layer.reset()
+
 
 class NeuralNetworkEnsemble(SequentialProcessor):
     """