From 1fe8e2041d91149bd246878d9b3626110250a5b5 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sat, 14 Apr 2018 23:28:28 +0100 Subject: [PATCH 1/8] Fixing BN learning phase behaviour. --- keras/backend/cntk_backend.py | 2 +- keras/backend/tensorflow_backend.py | 2 +- keras/backend/theano_backend.py | 2 +- keras/layers/normalization.py | 20 +++++++++++++++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py index f6d7a149fdc..a9a2a7adf6f 100644 --- a/keras/backend/cntk_backend.py +++ b/keras/backend/cntk_backend.py @@ -69,7 +69,7 @@ def in_train_phase(x, alt, training=None): training = learning_phase() uses_learning_phase = True else: - uses_learning_phase = False + uses_learning_phase = getattr(training, '_uses_learning_phase', False) # CNTK currently don't support cond op, so here we use # element_select approach as workaround. It may have diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py index 392120fc8fd..d2b5753224b 100644 --- a/keras/backend/tensorflow_backend.py +++ b/keras/backend/tensorflow_backend.py @@ -2865,7 +2865,7 @@ def in_train_phase(x, alt, training=None): training = learning_phase() uses_learning_phase = True else: - uses_learning_phase = False + uses_learning_phase = getattr(training, '_uses_learning_phase', False) if training is 1 or training is True: if callable(x): diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py index ec3c1b98862..cd4b7a44687 100644 --- a/keras/backend/theano_backend.py +++ b/keras/backend/theano_backend.py @@ -1496,7 +1496,7 @@ def in_train_phase(x, alt, training=None): training = learning_phase() uses_learning_phase = True else: - uses_learning_phase = False + uses_learning_phase = getattr(training, '_uses_learning_phase', False) if training is 1 or training is True: if callable(x): diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py index e762bfd11ca..ea48fb879ae 100644 --- a/keras/layers/normalization.py +++ b/keras/layers/normalization.py @@ -72,6 +72,8 @@ def __init__(self, beta_constraint=None, gamma_constraint=None, **kwargs): + self._trainable = True + self._trainable_tensor = K.variable(1, dtype='int32', name='trainable') super(BatchNormalization, self).__init__(**kwargs) self.supports_masking = True self.axis = axis @@ -88,6 +90,18 @@ def __init__(self, self.beta_constraint = constraints.get(beta_constraint) self.gamma_constraint = constraints.get(gamma_constraint) + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, trainable): + trainable = bool(trainable) + # Speed it up by avoiding unnecessary set_value() calls + if self._trainable != trainable: + self._trainable = trainable + K.set_value(self._trainable_tensor, 1 if trainable else 0) + def build(self, input_shape): dim = input_shape[self.axis] if dim is None: @@ -171,9 +185,13 @@ def normalize_inference(): self.gamma, epsilon=self.epsilon) - # If the learning phase is *static* and set to inference: if training in {0, False}: + # If the learning phase is *static* and set to inference: return normalize_inference() + elif training is None: + # If it's undefined then if trainable tensor is on respect learning phase else set to false + training = K.switch(self._trainable_tensor, K.cast(K.learning_phase(), 'int32'), K.constant(0, dtype='int32')) + training._uses_learning_phase = True # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( From 6ce3f4526563f22c10da21b425fd351b1a534c90 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sat, 14 Apr 2018 23:31:03 +0100 Subject: [PATCH 2/8] Fixing coding styles. --- keras/layers/normalization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py index ea48fb879ae..68ad40e09c8 100644 --- a/keras/layers/normalization.py +++ b/keras/layers/normalization.py @@ -190,7 +190,8 @@ def normalize_inference(): return normalize_inference() elif training is None: # If it's undefined then if trainable tensor is on respect learning phase else set to false - training = K.switch(self._trainable_tensor, K.cast(K.learning_phase(), 'int32'), K.constant(0, dtype='int32')) + training = K.switch(self._trainable_tensor, K.cast(K.learning_phase(), 'int32'), + K.constant(0, dtype='int32')) training._uses_learning_phase = True # If the learning is either dynamic, or set to training: From 9c8f93e4ec52d1d9e0ee5f3f86e079c84ab27dc8 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sat, 14 Apr 2018 23:35:15 +0100 Subject: [PATCH 3/8] Update comments. --- keras/layers/normalization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py index 68ad40e09c8..edf984c1697 100644 --- a/keras/layers/normalization.py +++ b/keras/layers/normalization.py @@ -92,12 +92,13 @@ def __init__(self, @property def trainable(self): + # Use cached value to avoid unnecessary get_value() calls return self._trainable @trainable.setter def trainable(self, trainable): trainable = bool(trainable) - # Speed it up by avoiding unnecessary set_value() calls + # Change when different to avoid unnecessary set_value() calls if self._trainable != trainable: self._trainable = trainable K.set_value(self._trainable_tensor, 1 if trainable else 0) From 2077d89d7398765e193f83ab465b9854c131e8df Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 15 Apr 2018 00:27:01 +0100 Subject: [PATCH 4/8] Switching to float32 to fix CNTK problems. --- keras/layers/normalization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py index edf984c1697..167698d869a 100644 --- a/keras/layers/normalization.py +++ b/keras/layers/normalization.py @@ -73,7 +73,7 @@ def __init__(self, gamma_constraint=None, **kwargs): self._trainable = True - self._trainable_tensor = K.variable(1, dtype='int32', name='trainable') + self._trainable_tensor = K.variable(1, dtype='float32', name='trainable') super(BatchNormalization, self).__init__(**kwargs) self.supports_masking = True self.axis = axis @@ -191,8 +191,8 @@ def normalize_inference(): return normalize_inference() elif training is None: # If it's undefined then if trainable tensor is on respect learning phase else set to false - training = K.switch(self._trainable_tensor, K.cast(K.learning_phase(), 'int32'), - K.constant(0, dtype='int32')) + training = K.switch(self._trainable_tensor, K.cast(K.learning_phase(), 'float32'), + K.constant(0, dtype='float32')) training._uses_learning_phase = True # If the learning is either dynamic, or set to training: From b1c62f51d5eed50af527796089a5b38b22e24576 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 15 Apr 2018 02:44:57 +0100 Subject: [PATCH 5/8] Adding unit-test. --- tests/keras/layers/normalization_test.py | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/keras/layers/normalization_test.py b/tests/keras/layers/normalization_test.py index a4c1c476c5f..bcc0c06c211 100644 --- a/tests/keras/layers/normalization_test.py +++ b/tests/keras/layers/normalization_test.py @@ -12,6 +12,7 @@ input_1 = np.arange(10) input_2 = np.zeros(10) input_3 = np.ones((10)) +input_4 = np.expand_dims(np.arange(10.), axis=1) input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] @@ -223,5 +224,35 @@ def test_that_trainable_disables_updates(): assert_allclose(x1, x2, atol=1e-7) +@keras_test +def test_batchnorm_trainable(): + bn_mean = 0.5 + bn_std = 10. + + def get_model(bn_mean, bn_std): + input = Input(shape=(1,)) + x = normalization.BatchNormalization(center=False, scale=False)(input) + model = Model(input, x) + model.set_weights([np.array([bn_mean]), np.array([bn_std**2])]) + return model + + # Simulates training-mode with trainable layer. Should use mini-batch statistics. + K.set_learning_phase(1) + model = get_model(bn_mean, bn_std) + model.layers[1].trainable = True + model.compile(loss='mse', optimizer='rmsprop') + out = model.predict(input_4) + assert_allclose((input_4 - np.mean(input_4)) / np.std(input_4), out, atol=1e-4) + + # In all other cases we should use the moving mean and variance from BN. + for lp, trainable in [(1, False), (0, True), (0, False)]: + K.set_learning_phase(lp) + model = get_model(bn_mean, bn_std) + model.layers[1].trainable = trainable + model.compile(loss='mse', optimizer='rmsprop') + out = model.predict(input_4) + assert_allclose((input_4 - bn_mean) / bn_std, out, atol=1e-4) + + if __name__ == '__main__': pytest.main([__file__]) From fcc4592b56861012f3ca3236589b331cb85ad5a1 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 15 Apr 2018 03:56:05 +0100 Subject: [PATCH 6/8] Changing test to avoid bug on Theano. --- tests/keras/layers/normalization_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/keras/layers/normalization_test.py b/tests/keras/layers/normalization_test.py index bcc0c06c211..63a9ae5e8ae 100644 --- a/tests/keras/layers/normalization_test.py +++ b/tests/keras/layers/normalization_test.py @@ -231,9 +231,10 @@ def test_batchnorm_trainable(): def get_model(bn_mean, bn_std): input = Input(shape=(1,)) - x = normalization.BatchNormalization(center=False, scale=False)(input) + x = normalization.BatchNormalization()(input) model = Model(input, x) - model.set_weights([np.array([bn_mean]), np.array([bn_std**2])]) + model.set_weights([np.array([1.]), np.array([0.]), + np.array([bn_mean]), np.array([bn_std**2])]) return model # Simulates training-mode with trainable layer. Should use mini-batch statistics. From 6f0ddf0cf6262cb0a7a947880bdd24d0baf59fc4 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 17 Apr 2018 21:57:14 +0100 Subject: [PATCH 7/8] Changing atol value. --- tests/keras/layers/normalization_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/keras/layers/normalization_test.py b/tests/keras/layers/normalization_test.py index 63a9ae5e8ae..851fa57b5dc 100644 --- a/tests/keras/layers/normalization_test.py +++ b/tests/keras/layers/normalization_test.py @@ -234,7 +234,7 @@ def get_model(bn_mean, bn_std): x = normalization.BatchNormalization()(input) model = Model(input, x) model.set_weights([np.array([1.]), np.array([0.]), - np.array([bn_mean]), np.array([bn_std**2])]) + np.array([bn_mean]), np.array([bn_std ** 2])]) return model # Simulates training-mode with trainable layer. Should use mini-batch statistics. @@ -243,7 +243,7 @@ def get_model(bn_mean, bn_std): model.layers[1].trainable = True model.compile(loss='mse', optimizer='rmsprop') out = model.predict(input_4) - assert_allclose((input_4 - np.mean(input_4)) / np.std(input_4), out, atol=1e-4) + assert_allclose((input_4 - np.mean(input_4)) / np.std(input_4), out, atol=1e-3) # In all other cases we should use the moving mean and variance from BN. for lp, trainable in [(1, False), (0, True), (0, False)]: @@ -252,7 +252,7 @@ def get_model(bn_mean, bn_std): model.layers[1].trainable = trainable model.compile(loss='mse', optimizer='rmsprop') out = model.predict(input_4) - assert_allclose((input_4 - bn_mean) / bn_std, out, atol=1e-4) + assert_allclose((input_4 - bn_mean) / bn_std, out, atol=1e-3) if __name__ == '__main__': From dfeedba1e8fd12bbe6f94dfb113c7434fadf87f3 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 18 Apr 2018 00:04:39 +0100 Subject: [PATCH 8/8] Adding support for scalars in the int_shape() method of cntk backend. --- keras/backend/cntk_backend.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py index 540025b338d..5c1131c9647 100644 --- a/keras/backend/cntk_backend.py +++ b/keras/backend/cntk_backend.py @@ -323,6 +323,9 @@ def is_sparse(tensor): def int_shape(x): + if type(x) in {int, float}: + return () + if hasattr(x, '_keras_shape'): return x._keras_shape