From e88c21fdc0fe4242156d58b91a922686e57e9a6a Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Wed, 26 Apr 2023 14:42:06 -0300 Subject: [PATCH 1/5] Support unknown sequence lengths in LMUFeedforward --- CHANGES.rst | 12 ++++++ keras_lmu/layers.py | 52 +++++++++++++++++-------- keras_lmu/tests/test_layers.py | 69 +++++++++++++++++++++++----------- 3 files changed, 96 insertions(+), 37 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9824b899..21196c70 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -24,6 +24,18 @@ Release history *Compatible with TensorFlow 2.4 - 2.11* +**Changed** + +- ``LMUFeedforward`` can now be used with unknown sequence lengths, and ``LMU`` will + use ``LMUFeedforward`` for unknown sequence lengths (as long as the other conditions + are met, as before). (`#52`_) + +**Fixed** + +- Fixed errors when setting non-default dtype on LMU layers. (`#52`_) + +.. _#52: https://github.com/nengo/keras-lmu/pull/52 + 0.5.0 (January 26, 2023) ======================== diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index eb1c641d..4fc55187 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -1,5 +1,7 @@ """Core classes for the KerasLMU package.""" +import warnings + import numpy as np import tensorflow as tf from packaging import version @@ -609,7 +611,6 @@ def build(self, input_shapes): if ( not self.hidden_to_memory and not self.memory_to_memory - and input_shapes[1] is not None and not self.trainable_theta ): self.layer = LMUFeedforward( @@ -626,6 +627,7 @@ def build(self, input_shapes): bias_regularizer=self.bias_regularizer, dropout=self.dropout, return_sequences=self.return_sequences, + dtype=self.dtype, ) else: self.layer = tf.keras.layers.RNN( @@ -648,8 +650,10 @@ def build(self, input_shapes): bias_regularizer=self.bias_regularizer, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout, + dtype=self.dtype, ), return_sequences=self.return_sequences, + dtype=self.dtype, ) self.layer.build(input_shapes) @@ -826,8 +830,10 @@ def __init__( discretizer=discretizer, kernel_initializer=None, trainable=False, + dtype=self.dtype, ), return_sequences=True, + dtype=self.dtype, ) self.impulse_response = None self.kernel = None @@ -846,31 +852,37 @@ def build(self, input_shape): super().build(input_shape) - seq_len = input_shape[1] enc_d = input_shape[-1] - + seq_len = input_shape[1] if seq_len is None: - # TODO: we could dynamically run the impulse response for longer if - # needed using stateful=True - raise ValueError( - f"LMUFeedforward requires that the input shape's temporal axis be " - f"fully specified (got {seq_len})" + theta_factor = 5 + warnings.warn( + f"Approximating unknown impulse length with {theta_factor}*theta; " + f"setting a fixed sequence length on inputs will remove the need for " + f"approximation" ) + impulse_len = self.theta * theta_factor + else: + impulse_len = seq_len - impulse = tf.reshape(tf.eye(seq_len, 1), (1, -1, 1)) + impulse = tf.reshape(tf.eye(impulse_len, 1), (1, -1, 1)) self.impulse_response = tf.squeeze( self.delay_layer(impulse, training=False), axis=0 ) if self.conv_mode == "fft": - self.impulse_response = tf.signal.rfft( - tf.transpose(self.impulse_response), - fft_length=[2 * seq_len], + self.impulse_response_fft = ( + None + if seq_len is None + else tf.signal.rfft( + tf.transpose(self.impulse_response), + fft_length=[2 * seq_len], + ) ) else: if self.truncate_ir is not None: - assert self.impulse_response.shape == (seq_len, self.order) + assert self.impulse_response.shape == (impulse_len, self.order) cumsum = tf.math.cumsum( tf.math.abs(self.impulse_response), axis=0, reverse=True @@ -955,7 +967,9 @@ def call(self, inputs, training=None): h = h_in if self.return_sequences else h_in[:, -1] elif hasattr(self.hidden_cell, "state_size"): h = tf.keras.layers.RNN( - self.hidden_cell, return_sequences=self.return_sequences + self.hidden_cell, + return_sequences=self.return_sequences, + dtype=self.dtype, )(h_in, training=training) else: if not self.return_sequences: @@ -977,9 +991,17 @@ def _fft_convolution(self, u): # Pad sequences to avoid circular convolution # Perform the FFT fft_input = tf.signal.rfft(u, fft_length=[2 * seq_len]) + impulse_response = ( + tf.signal.rfft( + tf.transpose(self.impulse_response[:seq_len]), + fft_length=[2 * seq_len], + ) + if self.impulse_response_fft is None + else self.impulse_response_fft + ) # Elementwise product of FFT (with broadcasting) - result = tf.expand_dims(fft_input, axis=-2) * self.impulse_response + result = tf.expand_dims(fft_input, axis=-2) * impulse_response # Inverse FFT m = tf.signal.irfft(result, fft_length=[2 * seq_len])[..., :seq_len] diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py index d85c4453..5f8a508b 100644 --- a/keras_lmu/tests/test_layers.py +++ b/keras_lmu/tests/test_layers.py @@ -217,40 +217,48 @@ def test_save_load_serialization(mode, tmp_path, trainable_theta, discretizer): "hidden_cell", ( lambda: None, - lambda: tf.keras.layers.Dense(4), - lambda: tf.keras.layers.SimpleRNNCell(4), + lambda: tf.keras.layers.Dense(4, dtype="float64"), + lambda: tf.keras.layers.SimpleRNNCell(4, dtype="float64"), ), ) -@pytest.mark.parametrize("memory_d", [1, 4]) @pytest.mark.parametrize("discretizer", ("zoh", "euler")) @pytest.mark.parametrize("conv_mode", ["fft", "raw"]) +@pytest.mark.parametrize("seq_len", [10, 50]) def test_feedforward( - return_sequences, hidden_cell, memory_d, discretizer, conv_mode, rng + return_sequences, hidden_cell, discretizer, conv_mode, seq_len, rng, seed ): kwargs = { - "memory_d": memory_d, + "memory_d": 4, "order": 2, - "theta": 12, + "theta": 5, "hidden_cell": hidden_cell(), "discretizer": discretizer, + "dtype": "float64", } - x = rng.uniform(-1, 1, size=(2, 10, 32)) + x = rng.uniform(-1, 1, size=(2, seq_len, 32)) rnn_layer = tf.keras.layers.RNN( layers.LMUCell(**kwargs), return_sequences=return_sequences, + dtype="float64", ) rnn_out = rnn_layer(x) ff_layer = layers.LMUFeedforward( return_sequences=return_sequences, conv_mode=conv_mode, **kwargs ) - ff_layer.build(x.shape) - ff_layer.kernel.assign(rnn_layer.cell.kernel) + ff_layer.build((2, None, 32)) # testing with unknown sequence length + ff_layer.set_weights(rnn_layer.get_weights()) ff_out = ff_layer(x, training=None) - assert np.allclose(rnn_out, ff_out, atol=2e-6) + assert ff_out.dtype == rnn_out.dtype == "float64" + assert np.allclose( + rnn_out, + ff_out, + # larger error for longer sequences, where the error accumulates more + atol=5e-8 if seq_len < 50 else 5e-4, + ), np.max(abs(rnn_out - ff_out)) @pytest.mark.parametrize("truncate_ir", [None, 1e-5, 1e-4, 1e-3]) @@ -284,7 +292,7 @@ def test_raw_truncation(truncate_ir, rng): def test_validation_errors(): ff_layer = layers.LMUFeedforward(1, 2, 3, None) - with pytest.raises(ValueError, match="temporal axis be fully specified"): + with pytest.warns(UserWarning, match="unknown impulse length"): ff_layer(tf.keras.Input((None, 32))) with pytest.raises(ValueError, match="hidden_to_memory must be False"): @@ -301,18 +309,16 @@ def test_validation_errors(): @pytest.mark.parametrize( - "should_use_feedforward, hidden_to_memory, memory_to_memory, steps, " - "trainable_theta", + "should_use_feedforward, hidden_to_memory, memory_to_memory, trainable_theta", [ - (True, False, False, 5, False), - (False, True, False, 5, False), - (False, False, True, 5, False), - (False, False, False, None, False), - (False, False, False, 5, True), + (True, False, False, False), + (False, True, False, False), + (False, False, True, False), + (False, False, False, True), ], ) def test_feedforward_auto_swap( - should_use_feedforward, hidden_to_memory, memory_to_memory, steps, trainable_theta + should_use_feedforward, hidden_to_memory, memory_to_memory, trainable_theta ): lmu = layers.LMU( 4, @@ -323,7 +329,7 @@ def test_feedforward_auto_swap( memory_to_memory=memory_to_memory, trainable_theta=trainable_theta, ) - lmu.build((32, steps, 8)) + lmu.build((32, None, 8)) assert isinstance(lmu.layer, layers.LMUFeedforward) == should_use_feedforward @@ -370,7 +376,7 @@ def test_hidden_types(hidden_cell, feedforward, rng): lmu_output = lmu(x) assert np.allclose( - lmu_output, base_output, atol=2e-6 if feedforward else 1e-8 + lmu_output, base_output, atol=3e-6 if feedforward else 1e-8 ), np.max(np.abs(lmu_output - base_output)) @@ -505,7 +511,7 @@ def test_fit(feedforward, discretizer, trainable_theta): kernel_initializer="zeros", ) - inputs = tf.keras.layers.Input((5 if feedforward else None, 10)) + inputs = tf.keras.layers.Input((None, 10)) lmu = lmu_layer(inputs) outputs = tf.keras.layers.Dense(2)(lmu) @@ -737,3 +743,22 @@ def test_get_config(cls): for key in sig.parameters: if key not in ("self", "kwargs"): assert key in config, key + + +@pytest.mark.parametrize("feedforward", [True, False]) +@pytest.mark.parametrize("dtype", [None, "float32", "float64"]) +def test_dtype(feedforward, dtype): + x = np.ones((2, 10, 5)) + layer = layers.LMU( + 1, + 2, + 3, + tf.keras.layers.SimpleRNNCell(4, dtype=dtype), + trainable_theta=not feedforward, + dtype=dtype, + ) + y = layer(x) + assert isinstance( + layer.layer, layers.LMUFeedforward if feedforward else tf.keras.layers.RNN + ) + assert y.dtype == ("float32" if dtype is None else dtype) From 1ba45b577fa59b93fd694caaca5c775b1e3ec71f Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Wed, 26 Apr 2023 14:43:11 -0300 Subject: [PATCH 2/5] Updates for latest pylint --- keras_lmu/layers.py | 26 +++++++++++++++++++------- keras_lmu/tests/test_layers.py | 3 +-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index 4fc55187..698d2ed8 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -230,7 +230,7 @@ def _cont2discrete_zoh(A, B): """ # combine A/B and pad to make square matrix - em_upper = tf.concat([A, B], axis=0) + em_upper = tf.concat([A, B], axis=0) # pylint: disable=no-value-for-parameter em = tf.pad(em_upper, [(0, 0), (0, B.shape[0])]) # compute matrix exponential @@ -334,7 +334,11 @@ def call(self, inputs, states, training=None): # noqa: C901 m = states[-1] # compute memory input - u = tf.concat((inputs, h[0]), axis=1) if self.hidden_to_memory else inputs + u = ( + tf.concat((inputs, h[0]), axis=1) # pylint: disable=no-value-for-parameter + if self.hidden_to_memory + else inputs + ) if self.dropout > 0: u *= self.get_dropout_mask_for_cell(u, training) if self.kernel is not None: @@ -383,7 +387,11 @@ def call(self, inputs, states, training=None): # noqa: C901 m = tf.reshape(m, (-1, self.memory_d * self.order)) # apply hidden cell - h_in = tf.concat((m, inputs), axis=1) if self.input_to_hidden else m + h_in = ( + tf.concat((m, inputs), axis=1) # pylint: disable=no-value-for-parameter + if self.input_to_hidden + else m + ) if self.hidden_cell is None: o = h_in @@ -595,7 +603,7 @@ def theta(self): return self._init_theta - def build(self, input_shapes): + def build(self, input_shape): """ Builds the layer. @@ -606,7 +614,7 @@ def build(self, input_shapes): with some additional bookkeeping. """ - super().build(input_shapes) + super().build(input_shape) if ( not self.hidden_to_memory @@ -656,7 +664,7 @@ def build(self, input_shapes): dtype=self.dtype, ) - self.layer.build(input_shapes) + self.layer.build(input_shape) def call(self, inputs, training=None): """ @@ -961,7 +969,11 @@ def call(self, inputs, training=None): m = self._raw_convolution(u) # apply hidden cell - h_in = tf.concat((m, inputs), axis=-1) if self.input_to_hidden else m + h_in = ( + tf.concat((m, inputs), axis=-1) # pylint: disable=no-value-for-parameter + if self.input_to_hidden + else m + ) if self.hidden_cell is None: h = h_in if self.return_sequences else h_in[:, -1] diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py index 5f8a508b..58f92258 100644 --- a/keras_lmu/tests/test_layers.py +++ b/keras_lmu/tests/test_layers.py @@ -734,8 +734,7 @@ def test_regularizer_loss(fft, bias): def test_get_config(cls): """Test that all ``__init__`` arguments appear in the ``get_config`` dictionary.""" - params = {"memory_d": 2, "order": 5, "theta": 3.2, "hidden_cell": None} - obj = cls(**params) + obj = cls(memory_d=2, order=5, theta=3.2, hidden_cell=None) config = obj.get_config() sig = inspect.signature(cls.__init__) From de4cfd8ad9cff006dd9226c9ead71acd99729bfe Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Wed, 26 Apr 2023 15:54:17 -0300 Subject: [PATCH 3/5] Set fixed cudnn version The latest cudnn is not compatible with the old nvidia drivers installed in the azure VMs, and we can't update the nvidia drivers without updating the whole OS at this point. --- .nengobones.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.nengobones.yml b/.nengobones.yml index bf58f7d5..5b702e47 100644 --- a/.nengobones.yml +++ b/.nengobones.yml @@ -81,7 +81,7 @@ ci_scripts: TF_FORCE_GPU_ALLOW_GROWTH: "true" TF_VERSION: $TF_VERSION remote_setup: - - micromamba install -y "$TF_VERSION" + - micromamba install -y "$TF_VERSION" cudnn=8.4 - template: remote-script remote_script: docs output_name: remote-docs @@ -89,7 +89,7 @@ ci_scripts: azure_name: nengo-dl-docs azure_group: nengo-ci remote_setup: - - micromamba install -y "$TF_VERSION" + - micromamba install -y "$TF_VERSION" cudnn=8.4 - template: remote-script remote_script: examples output_name: remote-examples @@ -97,7 +97,7 @@ ci_scripts: azure_name: nengo-dl-examples azure_group: nengo-ci remote_setup: - - micromamba install -y "$TF_VERSION" + - micromamba install -y "$TF_VERSION" cudnn=8.4 - template: deploy wheel: true From 41fdc58ecc3ee13068e835a048e80ea433a7822c Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Mon, 1 May 2023 16:29:20 -0300 Subject: [PATCH 4/5] Allow input_to_hidden with hidden_cell=None Nothing technically wrong with this, and can be used for skip connections. --- CHANGES.rst | 2 ++ keras_lmu/layers.py | 10 ++++------ keras_lmu/tests/test_layers.py | 6 ------ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 21196c70..756831d5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -29,6 +29,8 @@ Release history - ``LMUFeedforward`` can now be used with unknown sequence lengths, and ``LMU`` will use ``LMUFeedforward`` for unknown sequence lengths (as long as the other conditions are met, as before). (`#52`_) +- Allow ``input_to_hidden=True`` with ``hidden_cell=None``. This will act as a skip + connection. (`#52`_) **Fixed** diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index 698d2ed8..8e8f579e 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -156,9 +156,10 @@ def __init__( ) if self.hidden_cell is None: - for conn in ("hidden_to_memory", "input_to_hidden"): - if getattr(self, conn): - raise ValueError(f"{conn} must be False if hidden_cell is None") + if self.hidden_to_memory: + raise ValueError( + "hidden_to_memory must be False if hidden_cell is None" + ) self.hidden_output_size = self.memory_d * self.order self.hidden_state_size = [] @@ -802,9 +803,6 @@ def __init__( ): super().__init__(**kwargs) - if input_to_hidden and hidden_cell is None: - raise ValueError("input_to_hidden must be False if hidden_cell is None") - if conv_mode not in ("fft", "raw"): raise ValueError(f"Unrecognized conv mode '{conv_mode}'") diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py index 58f92258..5a96ce9c 100644 --- a/keras_lmu/tests/test_layers.py +++ b/keras_lmu/tests/test_layers.py @@ -298,12 +298,6 @@ def test_validation_errors(): with pytest.raises(ValueError, match="hidden_to_memory must be False"): layers.LMUCell(1, 2, 3, None, hidden_to_memory=True) - with pytest.raises(ValueError, match="input_to_hidden must be False"): - layers.LMUCell(1, 2, 3, None, input_to_hidden=True) - - with pytest.raises(ValueError, match="input_to_hidden must be False"): - layers.LMUFeedforward(1, 2, 3, None, input_to_hidden=True) - with pytest.raises(ValueError, match="Unrecognized conv mode"): layers.LMUFeedforward(1, 2, 3, None, conv_mode="raw_bad") From a8187333ee1b7f07a31acef3c61f17c79828bfea Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Tue, 2 May 2023 18:28:07 -0300 Subject: [PATCH 5/5] Swap order of LMU states Having the memory state come first is more intuitive, as it is both always present and comes first in the computational flow. --- .nengobones.yml | 4 ++-- CHANGES.rst | 4 +++- keras_lmu/layers.py | 14 +++++++------- keras_lmu/version.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.nengobones.yml b/.nengobones.yml index 5b702e47..87e01e73 100644 --- a/.nengobones.yml +++ b/.nengobones.yml @@ -108,6 +108,6 @@ pyproject_toml: {} version_py: type: semver major: 0 - minor: 5 - patch: 1 + minor: 6 + patch: 0 release: false diff --git a/CHANGES.rst b/CHANGES.rst index 756831d5..49079826 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,7 +19,7 @@ Release history - Removed - Fixed -0.5.1 (unreleased) +0.6.0 (unreleased) ================== *Compatible with TensorFlow 2.4 - 2.11* @@ -31,6 +31,8 @@ Release history are met, as before). (`#52`_) - Allow ``input_to_hidden=True`` with ``hidden_cell=None``. This will act as a skip connection. (`#52`_) +- Changed order of LMU states so that the LMU memory state always comes first, and + any states from the hidden cell come afterwards. (`#52`_) **Fixed** diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index 8e8f579e..62999175 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -171,9 +171,9 @@ def __init__( self.hidden_output_size = self.hidden_cell.units self.hidden_state_size = [self.hidden_cell.units] - self.state_size = tf.nest.flatten(self.hidden_state_size) + [ - self.memory_d * self.order - ] + self.state_size = [self.memory_d * self.order] + tf.nest.flatten( + self.hidden_state_size + ) self.output_size = self.hidden_output_size @property @@ -329,10 +329,10 @@ def call(self, inputs, states, training=None): # noqa: C901 states = tf.nest.flatten(states) - # state for the hidden cell - h = states[:-1] # state for the LMU memory - m = states[-1] + m = states[0] + # state for the hidden cell + h = states[1:] # compute memory input u = ( @@ -403,7 +403,7 @@ def call(self, inputs, states, training=None): # noqa: C901 o = self.hidden_cell(h_in, training=training) h = [o] - return o, h + [m] + return o, [m] + h def reset_dropout_mask(self): """Reset dropout mask for memory and hidden components.""" diff --git a/keras_lmu/version.py b/keras_lmu/version.py index b76cccaf..fcf80bf8 100644 --- a/keras_lmu/version.py +++ b/keras_lmu/version.py @@ -11,7 +11,7 @@ tagged with the version. """ -version_info = (0, 5, 1) +version_info = (0, 6, 0) name = "keras-lmu" dev = 0