From 118055056f7b300fadc564a641229b8ed8bb79cc Mon Sep 17 00:00:00 2001 From: henrymoss Date: Thu, 15 Jul 2021 09:06:09 +0100 Subject: [PATCH 1/4] first gop --- tests/unit/models/test_model_interfaces.py | 120 +++++++++++++++++---- trieste/models/model_interfaces.py | 65 +++++++---- 2 files changed, 148 insertions(+), 37 deletions(-) diff --git a/tests/unit/models/test_model_interfaces.py b/tests/unit/models/test_model_interfaces.py index 317878890c..8b1cb7ae1d 100644 --- a/tests/unit/models/test_model_interfaces.py +++ b/tests/unit/models/test_model_interfaces.py @@ -361,9 +361,10 @@ def test_gaussian_process_regression_pairwise_covariance(gpr_interface_factory) @unittest.mock.patch( "trieste.models.model_interfaces.GaussianProcessRegression.find_best_model_initialization" ) -@pytest.mark.parametrize("d", [1, 2]) -def test_gaussian_process_regression_correctly_counts_num_trainable_params_with_priors( - mocked_model_initializer, d, gpr_interface_factory +@pytest.mark.parametrize("d", [1, 10]) +@pytest.mark.parametrize("prior_for_lengthscale", [True, False]) +def test_gaussian_process_regression_correctly_counts_params_that_can_be_sampled( + mocked_model_initializer, d, prior_for_lengthscale, gpr_interface_factory ) -> None: x = tf.constant(np.arange(1, 5 * d + 1).reshape(-1, d), dtype=tf.float64) # shape: [5, d] model = gpr_interface_factory(x, _3x_plus_10(x)) @@ -371,9 +372,18 @@ def test_gaussian_process_regression_correctly_counts_num_trainable_params_with_ model.model.likelihood.variance.assign(1.0) gpflow.set_trainable(model.model.likelihood, True) - model.model.kernel.lengthscales.prior = tfp.distributions.LogNormal( - loc=tf.math.log(model.model.kernel.lengthscales), scale=tf.cast(1.0, dtype=tf.float64) - ) + if prior_for_lengthscale: + model.model.kernel.lengthscales.prior = tfp.distributions.LogNormal( + loc=tf.math.log(model.model.kernel.lengthscales), scale=1.0 + ) + + else: + upper = tf.cast([10.0] * d, dtype=tf.float64) + lower = upper / 100 + model.model.kernel.lengthscales = gpflow.Parameter( + model.model.kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) + ) + model.model.likelihood.variance.prior = tfp.distributions.LogNormal( loc=tf.cast(-2.0, dtype=tf.float64), scale=tf.cast(5.0, dtype=tf.float64) ) @@ -389,12 +399,13 @@ def test_gaussian_process_regression_correctly_counts_num_trainable_params_with_ npt.assert_array_equal(num_samples, tf.minimum(1000, 100 * (d + 1))) -def test_find_best_model_initialization_only_changes_params_with_priors( - gpr_interface_factory, +@pytest.mark.parametrize("dim", [1, 10]) +def test_find_best_model_initialization_changes_params_with_priors( + gpr_interface_factory, dim: int ) -> None: x = tf.constant(np.arange(1, 5).reshape(-1, 1), dtype=gpflow.default_float()) # shape: [4, 1] model = gpr_interface_factory(x, _3x_plus_10(x)) - model.model.kernel = gpflow.kernels.RBF() + model.model.kernel = gpflow.kernels.RBF(lengthscales=[0.2] * dim) if isinstance(model, (VariationalGaussianProcess, SparseVariational)): pytest.skip("find_best_model_initialization is only implemented for the GPR models.") @@ -406,27 +417,62 @@ def test_find_best_model_initialization_only_changes_params_with_priors( model.find_best_model_initialization(2) npt.assert_allclose(1.0, model.model.kernel.variance) + npt.assert_array_equal(dim, model.model.kernel.lengthscales.shape) + npt.assert_raises( + AssertionError, npt.assert_allclose, [0.2, 0.2], model.model.kernel.lengthscales + ) + + +@pytest.mark.parametrize("dim", [1, 10]) +def test_find_best_model_initialization_changes_params_with_sigmoid_bjectors( + gpr_interface_factory, dim: int +) -> None: + x = tf.constant(np.arange(1, 5).reshape(-1, 1), dtype=gpflow.default_float()) # shape: [4, 1] + model = gpr_interface_factory(x, _3x_plus_10(x)) + model.model.kernel = gpflow.kernels.RBF(lengthscales=[0.2] * dim) + + if isinstance(model, (VariationalGaussianProcess, SparseVariational)): + pytest.skip("find_best_model_initialization is only implemented for the GPR models.") + + upper = tf.cast([10.0] * dim, dtype=tf.float64) + lower = upper / 100 + model.model.kernel.lengthscales = gpflow.Parameter( + model.model.kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) + ) + + model.find_best_model_initialization(2) + + npt.assert_allclose(1.0, model.model.kernel.variance) + npt.assert_array_equal(dim, model.model.kernel.lengthscales.shape) npt.assert_raises( AssertionError, npt.assert_allclose, [0.2, 0.2], model.model.kernel.lengthscales ) @random_seed -def test_find_best_model_initialization_improves_likelihood(gpr_interface_factory) -> None: +@pytest.mark.parametrize("dim", [1, 10]) +def test_find_best_model_initialization_improves_likelihood( + gpr_interface_factory, dim: int +) -> None: x = tf.constant(np.arange(1, 10).reshape(-1, 1), dtype=gpflow.default_float()) # shape: [4, 1] model = gpr_interface_factory(x, _3x_plus_10(x)) - model.model.kernel = gpflow.kernels.RBF() + model.model.kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) if isinstance(model, (VariationalGaussianProcess, SparseVariational)): pytest.skip("find_best_model_initialization is only implemented for the GPR models.") - model.model.kernel.lengthscales.prior = tfp.distributions.LogNormal( - loc=tf.math.log(model.model.kernel.lengthscales), scale=1.0 + model.model.kernel.variance.prior = tfp.distributions.LogNormal( + loc=np.float64(-2.0), scale=np.float64(1.0) + ) + upper = tf.cast([10.0] * dim, dtype=tf.float64) + lower = upper / 100 + model.model.kernel.lengthscales = gpflow.Parameter( + model.model.kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) ) - pre_init_likelihood = model.model.maximum_log_likelihood_objective() + pre_init_likelihood = -model.model.training_loss() model.find_best_model_initialization(10) - post_init_likelihood = model.model.maximum_log_likelihood_objective() + post_init_likelihood = -model.model.training_loss() npt.assert_array_less(pre_init_likelihood, post_init_likelihood) @@ -660,13 +706,51 @@ def test_sparse_variational_optimize(batcher, compile: bool) -> None: @random_seed -def test_randomize_model_hyperparameters_only_randomize_kernel_parameters_with_priors() -> None: - kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2, 0.2]) +@pytest.mark.parametrize("dim", [1, 10]) +def test_randomize_model_hyperparameters_randomize_kernel_parameters_with_priors(dim: int) -> None: + kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) kernel.lengthscales.prior = tfp.distributions.LogNormal( loc=tf.math.log(kernel.lengthscales), scale=1.0 ) + randomize_model_hyperparameters(kernel) + + npt.assert_allclose(1.0, kernel.variance) + npt.assert_array_equal(dim, kernel.lengthscales.shape) + npt.assert_raises(AssertionError, npt.assert_allclose, [0.2] * dim, kernel.lengthscales) + + +@random_seed +@pytest.mark.parametrize("dim", [1, 10]) +def test_randomize_model_hyperparameters_randomizes_constrained_kernel_parameters(dim: int) -> None: + kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) + upper = tf.cast([10.0] * dim, dtype=tf.float64) + lower = upper / 100 + kernel.lengthscales = gpflow.Parameter( + kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) + ) randomize_model_hyperparameters(kernel) npt.assert_allclose(1.0, kernel.variance) - npt.assert_raises(AssertionError, npt.assert_allclose, [0.2, 0.2], kernel.lengthscales) + npt.assert_array_equal(dim, kernel.lengthscales.shape) + npt.assert_raises(AssertionError, npt.assert_allclose, [0.2] * dim, kernel.lengthscales) + + +@random_seed +@pytest.mark.parametrize("dim", [1, 10]) +def test_randomize_model_hyperparameters_randomizes_kernel_parameters_with_constraints_or_priors( + dim: int, +) -> None: + kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) + upper = tf.cast([10.0] * dim, dtype=tf.float64) + lower = upper / 100 + kernel.lengthscales = gpflow.Parameter( + kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) + ) + kernel.variance.prior = tfp.distributions.LogNormal(loc=np.float64(-2.0), scale=np.float64(1.0)) + + randomize_model_hyperparameters(kernel) + + npt.assert_raises(AssertionError, npt.assert_allclose, 1.0, kernel.variance) + npt.assert_array_equal(dim, kernel.lengthscales.shape) + npt.assert_raises(AssertionError, npt.assert_allclose, [0.2] * dim, kernel.lengthscales) diff --git a/trieste/models/model_interfaces.py b/trieste/models/model_interfaces.py index 6a52415e7f..4835e978cf 100644 --- a/trieste/models/model_interfaces.py +++ b/trieste/models/model_interfaces.py @@ -20,6 +20,7 @@ import gpflow import tensorflow as tf +import tensorflow_probability as tfp from gpflow.models import GPR, SGPR, SVGP, VGP, GPModel from gpflow.utilities import multiple_assign, read_values @@ -424,30 +425,47 @@ def optimize(self, dataset: Dataset) -> None: """ Optimize the model with the specified `dataset`. - If any of the kernel's trainable parameters have priors, then we begin model optimization - from the best of a random sample from these parameters' priors. For trainable parameters - without priors, we begin optimization from their initial values. + For :class:`GaussianProcessRegression`, we (optionally) try multiple randomly sampled + kernel parameter configurations as well as the configuration specified when initializing + the kernel. The best configuration is used as the starting point for model optimization. + + For trainable parameters constrained to lie in a finite interval (through a sigmoid + bijector), we begin model optimization from the best of a random sample from these + parameters' acceptable domains. + + For trainable parameters without constraints but with priors, we begin model optimization + from the best of a random sample from these parameters' priors. + + For trainable parameters with neither priors or constraints, we begin optimization from + their initial values. :param dataset: The data with which to optimize the `model`. """ - num_trainable_params_with_priors = tf.reduce_sum( - [tf.size(param) for param in self.model.trainable_parameters if param.prior is not None] + num_trainable_params_with_priors_or_constraints = tf.reduce_sum( + [ + tf.size(param) + for param in self.model.trainable_parameters + if param.prior is not None or isinstance(param.bijector, tfp.bijectors.Sigmoid) + ] ) - if num_trainable_params_with_priors >= 1: # Find a promising kernel initialization - num_prior_samples = tf.minimum(1000, 100 * num_trainable_params_with_priors) - self.find_best_model_initialization(num_prior_samples) + if ( + num_trainable_params_with_priors_or_constraints >= 1 + ): # Find a promising kernel initialization + num_kernel_samples = tf.minimum( + 1000, 100 * num_trainable_params_with_priors_or_constraints + ) + self.find_best_model_initialization(num_kernel_samples) self.optimizer.optimize(self.model, dataset) - def find_best_model_initialization(self, num_prior_samples) -> None: + def find_best_model_initialization(self, num_kernel_samples) -> None: """ - Test `num_prior_samples` models with kernel parameters sampled from their - priors. The model's kernel parameters are then set to those achieving maximal - likelihood across the sample. + Test `num_kernel_samples` models with sampled kernel parameters. The model's kernel + parameters are then set to the sample achieving maximal likelihood. - :param num_prior_samples: Number of randomly sampled kernels to evaluate. + :param num_kernel_samples: Number of randomly sampled kernels to evaluate. """ @tf.function @@ -460,10 +478,10 @@ def evaluate_likelihood_of_model_parameters() -> tf.Tensor: self.model.maximum_log_likelihood_objective() + self.model.log_prior_density() ) - for _ in tf.range(num_prior_samples): + for _ in tf.range(num_kernel_samples): try: log_likelihood = evaluate_likelihood_of_model_parameters() - except tf.errors.InvalidArgumentError: # allow badly specified priors + except tf.errors.InvalidArgumentError: # allow badly specified kernel params log_likelihood = -1e100 if log_likelihood > max_log_likelihood: # only keep best kernel params @@ -589,10 +607,19 @@ def _assert_data_is_compatible(new_data: Dataset, existing_data: Dataset) -> Non def randomize_model_hyperparameters(model: gpflow.models.GPModel) -> None: """ - Sets hyperparameters to random samples from their prior distributions. + Sets hyperparameters to random samples from their constrained domains or (if not constraints + are available) their prior distributions. :param model: Any GPModel from gpflow. """ - for parameter in model.trainable_parameters: - if parameter.prior is not None: - parameter.assign(parameter.prior.sample()) + for param in model.trainable_parameters: + if isinstance(param.bijector, tfp.bijectors.Sigmoid): + sample = tf.random.uniform( + [1], + minval=param.bijector.low, + maxval=param.bijector.high, + dtype=param.bijector.low.dtype, + ) + param.assign(sample) + elif param.prior is not None: + param.assign(param.prior.sample()) From 0b3aa866aee18035be415edd5edf0ed1a0485284 Mon Sep 17 00:00:00 2001 From: henrymoss Date: Thu, 15 Jul 2021 09:22:11 +0100 Subject: [PATCH 2/4] fix test --- tests/unit/models/test_model_interfaces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/models/test_model_interfaces.py b/tests/unit/models/test_model_interfaces.py index 8b1cb7ae1d..131e31aa3d 100644 --- a/tests/unit/models/test_model_interfaces.py +++ b/tests/unit/models/test_model_interfaces.py @@ -361,7 +361,7 @@ def test_gaussian_process_regression_pairwise_covariance(gpr_interface_factory) @unittest.mock.patch( "trieste.models.model_interfaces.GaussianProcessRegression.find_best_model_initialization" ) -@pytest.mark.parametrize("d", [1, 10]) +@pytest.mark.parametrize("d", [1, 3]) @pytest.mark.parametrize("prior_for_lengthscale", [True, False]) def test_gaussian_process_regression_correctly_counts_params_that_can_be_sampled( mocked_model_initializer, d, prior_for_lengthscale, gpr_interface_factory From 8c782fd6ba933672b88e35e0ab9fd380ee77571f Mon Sep 17 00:00:00 2001 From: henrymoss Date: Thu, 15 Jul 2021 15:45:35 +0100 Subject: [PATCH 3/4] extra test --- tests/unit/models/test_model_interfaces.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/models/test_model_interfaces.py b/tests/unit/models/test_model_interfaces.py index 131e31aa3d..e8ba768f2a 100644 --- a/tests/unit/models/test_model_interfaces.py +++ b/tests/unit/models/test_model_interfaces.py @@ -754,3 +754,25 @@ def test_randomize_model_hyperparameters_randomizes_kernel_parameters_with_const npt.assert_raises(AssertionError, npt.assert_allclose, 1.0, kernel.variance) npt.assert_array_equal(dim, kernel.lengthscales.shape) npt.assert_raises(AssertionError, npt.assert_allclose, [0.2] * dim, kernel.lengthscales) + + +@random_seed +@pytest.mark.parametrize("dim", [1, 10]) +def test_randomize_model_hyperparameters_samples_from_constraints_when_given_prior_and_constraint( + dim: int, +) -> None: + kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) + upper = tf.cast([0.5] * dim, dtype=tf.float64) + + lower = upper / 100 + kernel.lengthscales = gpflow.Parameter( + kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) + ) + kernel.lengthscales.prior = tfp.distributions.Uniform(low=10.0, high=100.0) + + kernel.variance.prior = tfp.distributions.LogNormal(loc=np.float64(-2.0), scale=np.float64(1.0)) + + randomize_model_hyperparameters(kernel) + + npt.assert_array_less(kernel.lengthscales, [0.5] * dim) + npt.assert_raises(AssertionError, npt.assert_allclose, [0.2] * dim, kernel.lengthscales) From 3f84e901451f55043de80b2d25c63613b4407cad Mon Sep 17 00:00:00 2001 From: henrymoss Date: Thu, 15 Jul 2021 15:46:03 +0100 Subject: [PATCH 4/4] done --- trieste/models/model_interfaces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trieste/models/model_interfaces.py b/trieste/models/model_interfaces.py index 4835e978cf..48822f465a 100644 --- a/trieste/models/model_interfaces.py +++ b/trieste/models/model_interfaces.py @@ -436,7 +436,7 @@ def optimize(self, dataset: Dataset) -> None: For trainable parameters without constraints but with priors, we begin model optimization from the best of a random sample from these parameters' priors. - For trainable parameters with neither priors or constraints, we begin optimization from + For trainable parameters with neither priors nor constraints, we begin optimization from their initial values. :param dataset: The data with which to optimize the `model`.