From 024dc52340660cfd6751d57b12aa4f93ca910684 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Fri, 13 Sep 2019 15:36:10 -0300 Subject: [PATCH 1/4] refactor, irmprove doc and scaling --- pymc3/smc/__init__.py | 2 +- pymc3/smc/sample_smc.py | 161 ++++++++++ pymc3/smc/smc.py | 660 +++++++++++++++++++++++++--------------- pymc3/smc/smc_utils.py | 301 ------------------ pymc3/tests/test_smc.py | 22 +- 5 files changed, 597 insertions(+), 549 deletions(-) create mode 100644 pymc3/smc/sample_smc.py delete mode 100644 pymc3/smc/smc_utils.py diff --git a/pymc3/smc/__init__.py b/pymc3/smc/__init__.py index cfd6595585..4dcfb80bff 100644 --- a/pymc3/smc/__init__.py +++ b/pymc3/smc/__init__.py @@ -1 +1 @@ -from .smc import sample_smc +from .sample_smc import sample_smc diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py new file mode 100644 index 0000000000..c39a7fbf86 --- /dev/null +++ b/pymc3/smc/sample_smc.py @@ -0,0 +1,161 @@ +from .smc import SMC +import logging + + +def sample_smc( + draws=1000, + kernel="metropolis", + n_steps=25, + parallel=False, + start=None, + cores=None, + tune_steps=True, + p_acc_rate=0.99, + threshold=0.5, + epsilon=1.0, + dist_func="absolute_error", + sum_stat=False, + progressbar=False, + model=None, + random_seed=-1, + psis=True, +): + """ + Sequential Monte Carlo based sampling + + Parameters + ---------- + draws : int + The number of samples to draw from the posterior (i.e. last stage). And also the number of + independent chains. Defaults to 1000. + kernel : str + Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`. + Use `ABC` for likelihood free inference togheter with a ``pm.Simulator``. + n_steps : int + The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used + for the first stage and for the others it will be determined automatically based on the + acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``. + parallel : bool + Distribute computations across cores if the number of cores is larger than 1. + Defaults to False. + start : dict, or array of dict + Starting point in parameter space. It should be a list of dict with length `chains`. + When None (default) the starting point is sampled from the prior distribution. + cores : int + The number of chains to run in parallel. If ``None`` (default), it will be automatically + set to the number of CPUs in the system. + tune_steps : bool + Whether to compute the number of steps automatically or not. Defaults to True + p_acc_rate : float + Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of + ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.99. + It should be between 0 and 1. + threshold : float + Determines the change of beta from stage to stage, i.e.indirectly the number of stages, + the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. + It should be between 0 and 1. + epsilon : float + Standard deviation of the gaussian pseudo likelihood. Only works with `kernel = ABC` + dist_func : str + Distance function. Available options are ``absolute_error`` (default) and + ``sum_of_squared_distance``. Only works with ``kernel = ABC`` + sum_stat : bool + Whether to use or not a summary statistics. Defaults to False. Only works with + ``kernel = ABC`` + progressbar : bool + Flag for displaying a progress bar. Defaults to False. + model : Model (optional if in ``with`` context)). + random_seed : int + random seed + + Notes + ----- + SMC works by moving through successive stages. At each stage the inverse temperature + :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 + we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution. + So in more general terms we are always computing samples from a tempered posterior that we can + write as: + + .. math:: + + p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta) + + A summary of the algorithm is: + + 1. Initialize :math:`\beta` at zero and stage at zero. + 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the + tempered posterior is the prior). + 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined + value (we use :math:`Nt`, where :math:`t` is 0.5 by default). + 4. Compute a set of N importance weights W. The weights are computed as the ratio of the + likelihoods of a sample at stage i+1 and stage i. + 5. Obtain :math:`S_{w}` by re-sampling according to W. + 6. Use W to compute the covariance for the proposal distribution. + 7. For stages other than 0 use the acceptance rate from the previous stage to estimate the + scaling of the proposal distribution and `n_steps`. + 8. Run N Metropolis chains (each one of length `n_steps`), starting each one from a different + sample in :math:`S_{w}`. + 9. Repeat from step 3 until :math:`\beta \ge 1`. + 10. The final result is a collection of N samples from the posterior. + + + References + ---------- + .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), + Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. + Geophysical Journal International, 2013, 194(3), pp.1701-1726, + `link `__ + + .. [Ching2007] Ching, J. and Chen, Y. (2007). + Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class + Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), + 816-832. `link `__ + """ + + smc = SMC( + draws=draws, + kernel=kernel, + n_steps=n_steps, + parallel=parallel, + start=start, + cores=cores, + tune_steps=tune_steps, + p_acc_rate=p_acc_rate, + threshold=threshold, + epsilon=epsilon, + dist_func=dist_func, + sum_stat=sum_stat, + progressbar=progressbar, + model=model, + random_seed=random_seed, + ) + + _log = logging.getLogger("pymc3") + _log.info("Sample initial stage: ...") + stage = 0 + smc.initialize_population() + smc.setup_kernel() + smc.initialize_logp() + + while smc.beta < 1: + smc.update_weights_beta(psis=psis) + _log.info( + "Stage: {:3d} Beta: {:.3f} Steps: {:3d} Acce: {:.3f}".format( + stage, smc.beta, smc.n_steps, smc.acc_rate + ) + ) + smc.resample() + smc.update_proposal() + if stage > 0: + smc.tune() + smc.mutate() + stage += 1 + + if smc.parallel and smc.cores > 1: + smc.pool.close() + smc.pool.join() + + trace = smc.posterior_to_trace() + + return trace diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index a79a6ec46f..4fded6f49c 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -1,279 +1,447 @@ -""" -Sequential Monte Carlo sampler -""" +from collections import OrderedDict + import numpy as np -import pymc3 as pm from tqdm import tqdm import multiprocessing as mp import warnings +from theano import function as theano_function -from ..step_methods.metropolis import MultivariateNormalProposal -from .smc_utils import ( - _initial_population, - _calc_covariance, - _tune, - _posterior_to_trace, - logp_forw, - calc_beta, - metrop_kernel, - PseudoLikelihood, -) -from ..theanof import inputvars, make_shared_replacements -from ..model import modelcontext -from ..parallel_sampling import _cpu_count +# from arviz import psislw +from ..model import modelcontext, Point +from ..parallel_sampling import _cpu_count +from ..theanof import inputvars, make_shared_replacements +from ..vartypes import discrete_types +from ..sampling import sample_prior_predictive +from ..theanof import floatX, join_nonshared_inputs +from ..step_methods.arraystep import metrop_select +from ..step_methods.metropolis import MultivariateNormalProposal +from ..backends.ndarray import NDArray +from ..backends.base import MultiTrace +from ..util import get_untransformed_name, is_transformed_name EXPERIMENTAL_WARNING = ( "Warning: SMC-ABC methods are experimental step methods and not yet" " recommended for use in PyMC3!" ) -__all__ = ["sample_smc"] - - -def sample_smc( - draws=1000, - kernel="metropolis", - n_steps=25, - parallel=False, - start=None, - cores=None, - tune_scaling=True, - tune_steps=True, - scaling=1.0, - p_acc_rate=0.99, - threshold=0.5, - epsilon=1.0, - dist_func="absolute_error", - sum_stat=False, - progressbar=False, - model=None, - random_seed=-1, -): - """ - Sequential Monte Carlo based sampling - - Parameters - ---------- - draws : int - The number of samples to draw from the posterior (i.e. last stage). And also the number of - independent chains. Defaults to 1000. - kernel : str - Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`. - Use `ABC` for likelihood free inference togheter with a ``pm.Simulator``. - n_steps : int - The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used - for the first stage and for the others it will be determined automatically based on the - acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``. - parallel : bool - Distribute computations across cores if the number of cores is larger than 1. - Defaults to False. - start : dict, or array of dict - Starting point in parameter space. It should be a list of dict with length `chains`. - When None (default) the starting point is sampled from the prior distribution. - cores : int - The number of chains to run in parallel. If ``None`` (default), it will be automatically - set to the number of CPUs in the system. - tune_scaling : bool - Whether to compute the scaling factor automatically or not. Defaults to True - tune_steps : bool - Whether to compute the number of steps automatically or not. Defaults to True - scaling : float - Scaling factor applied to the proposal distribution i.e. the step size of the Markov Chain. - If ``tune_scaling == True`` (defaults) it will be determined automatically at each stage. - p_acc_rate : float - Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of - ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.99. - It should be between 0 and 1. - threshold : float - Determines the change of beta from stage to stage, i.e.indirectly the number of stages, - the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. - It should be between 0 and 1. - epsilon : float - Standard deviation of the gaussian pseudo likelihood. Only works with `kernel = ABC` - dist_func : str - Distance function. Available options are ``absolute_error`` (default) and - ``sum_of_squared_distance``. Only works with ``kernel = ABC`` - sum_stat : bool - Whether to use or not a summary statistics. Defaults to False. Only works with - ``kernel = ABC`` - progressbar : bool - Flag for displaying a progress bar. Defaults to False. - model : Model (optional if in ``with`` context)). - random_seed : int - random seed - - Notes - ----- - SMC works by moving through successive stages. At each stage the inverse temperature - :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 - we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution. - So in more general terms we are always computing samples from a tempered posterior that we can - write as: - - .. math:: - - p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta) - - A summary of the algorithm is: - - 1. Initialize :math:`\beta` at zero and stage at zero. - 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the - tempered posterior is the prior). - 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined - value (we use :math:`Nt`, where :math:`t` is 0.5 by default). - 4. Compute a set of N importance weights W. The weights are computed as the ratio of the - likelihoods of a sample at stage i+1 and stage i. - 5. Obtain :math:`S_{w}` by re-sampling according to W. - 6. Use W to compute the covariance for the proposal distribution. - 7. For stages other than 0 use the acceptance rate from the previous stage to estimate the - scaling of the proposal distribution and `n_steps`. - 8. Run N Metropolis chains (each one of length `n_steps`), starting each one from a different - sample in :math:`S_{w}`. - 9. Repeat from step 3 until :math:`\beta \ge 1`. - 10. The final result is a collection of N samples from the posterior. - - - References - ---------- - .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), - Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. - Geophysical Journal International, 2013, 194(3), pp.1701-1726, - `link `__ - - .. [Ching2007] Ching, J. and Chen, Y. (2007). - Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class - Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), - 816-832. `link `__ - """ - - model = modelcontext(model) - - if random_seed != -1: - np.random.seed(random_seed) - - if cores is None: - cores = _cpu_count() - beta = 0 - stage = 0 - accepted = 0 - acc_rate = 1.0 - max_steps = n_steps - proposed = draws * n_steps - model.marginal_likelihood = 1 - variables = inputvars(model.vars) - discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables]) - any_discrete = discrete.any() - all_discrete = discrete.all() - shared = make_shared_replacements(variables, model) - prior_logp = logp_forw([model.varlogpt], variables, shared) - - pm._log.info("Sample initial stage: ...") - - posterior, var_info = _initial_population(draws, model, variables, start) - - if kernel.lower() == "abc": - warnings.warn(EXPERIMENTAL_WARNING) - simulator = model.observed_RVs[0] - likelihood_logp = PseudoLikelihood( - epsilon, - simulator.observations, - simulator.distribution.function, - model, - var_info, - dist_func, - sum_stat, +class SMC: + def __init__( + self, + draws=1000, + kernel="metropolis", + n_steps=25, + parallel=False, + start=None, + cores=None, + tune_steps=True, + p_acc_rate=0.99, + threshold=0.5, + epsilon=1.0, + dist_func="absolute_error", + sum_stat=False, + progressbar=False, + model=None, + random_seed=-1, + ): + + self.draws = draws + self.kernel = kernel + self.n_steps = n_steps + self.parallel = parallel + self.start = start + self.cores = cores + self.tune_steps = tune_steps + self.p_acc_rate = p_acc_rate + self.threshold = threshold + self.epsilon = epsilon + self.dist_func = dist_func + self.sum_stat = sum_stat + self.progressbar = progressbar + self.model = model + self.random_seed = random_seed + + self.model = modelcontext(model) + + if self.random_seed != -1: + np.random.seed(self.random_seed) + + if self.cores is None: + self.cores = _cpu_count() + + self.beta = 0 + self.max_steps = n_steps + self.proposed = draws * n_steps + self.acc_rate = 1 + self.acc_per_chain = np.ones(self.draws) + self.model.marginal_likelihood = 1 + self.variables = inputvars(self.model.vars) + dimension = sum(v.dsize for v in self.variables) + self.scalings = np.ones(self.draws) * min(1, 2.38 ** 2 / dimension) + self.discrete = np.concatenate( + [[v.dtype in discrete_types] * (v.dsize or 1) for v in self.variables] + ) + self.any_discrete = self.discrete.any() + self.all_discrete = self.discrete.all() + + def initialize_population(self): + """ + Create an initial population from the prior distribution + """ + population = [] + var_info = OrderedDict() + if self.start is None: + init_rnd = sample_prior_predictive( + self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model + ) + else: + init_rnd = self.start + + init = self.model.test_point + + for v in self.variables: + var_info[v.name] = (init[v.name].shape, init[v.name].size) + + for i in range(self.draws): + + point = Point({v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) + population.append(self.model.dict_to_array(point)) + + self.posterior = np.array(floatX(population)) + self.var_info = var_info + + def setup_kernel(self): + """ + Set up the likelihood logp function based on the chosen kernel + """ + shared = make_shared_replacements(self.variables, self.model) + self.prior_logp = logp_forw([self.model.varlogpt], self.variables, shared) + + if self.kernel.lower() == "abc": + warnings.warn(EXPERIMENTAL_WARNING) + simulator = self.model.observed_RVs[0] + self.likelihood_logp = PseudoLikelihood( + self.epsilon, + simulator.observations, + simulator.distribution.function, + self.model, + self.var_info, + self.dist_func, + self.sum_stat, + ) + elif self.kernel.lower() == "metropolis": + self.likelihood_logp = logp_forw([self.model.datalogpt], self.variables, shared) + + def initialize_logp(self): + """ + initialize the prior and likelihood log probabilities + """ + if self.parallel and self.cores > 1: + self.pool = mp.Pool(processes=self.cores) + priors = self.pool.starmap(self.prior_logp, [(sample,) for sample in self.posterior]) + likelihoods = self.pool.starmap( + self.likelihood_logp, [(sample,) for sample in self.posterior] + ) + else: + priors = [self.prior_logp(sample) for sample in self.posterior] + likelihoods = [self.likelihood_logp(sample) for sample in self.posterior] + + self.priors = np.array(priors).squeeze() + self.likelihoods = np.array(likelihoods).squeeze() + + def update_weights_beta(self, psis): + """ + Calculate the next inverse temperature (beta), the importance weights based on current beta + and tempered likelihood and updates the marginal likelihood estimation + """ + low_beta = old_beta = self.beta + up_beta = 2.0 + rN = int(len(self.likelihoods) * self.threshold) + + ll_diff = self.likelihoods - self.likelihoods.max() + while up_beta - low_beta > 1e-6: + new_beta = (low_beta + up_beta) / 2.0 + weights_un = np.exp((new_beta - old_beta) * ll_diff) + weights = weights_un / np.sum(weights_un) + ESS = int(1 / np.sum(weights ** 2)) + if ESS == rN: + break + elif ESS < rN: + up_beta = new_beta + else: + low_beta = new_beta + if new_beta >= 1: + new_beta = 1 + weights_un = np.exp((new_beta - old_beta) * ll_diff) + weights = weights_un / np.sum(weights_un) + + sj = np.exp((new_beta - old_beta) * self.likelihoods) + + # if psis: + # lw, ks = psislw(np.log(weights)) + # print(ks) + # if np.any(ks > 0.5): + # print('help', ks) + # weights = np.exp(lw) + + self.model.marginal_likelihood *= np.mean(sj) + self.beta = new_beta + self.weights = weights + + def resample(self): + """ + Resample particles based on importance weights + """ + resampling_indexes = np.random.choice( + np.arange(self.draws), size=self.draws, p=self.weights ) - elif kernel.lower() == "metropolis": - likelihood_logp = logp_forw([model.datalogpt], variables, shared) - - if parallel and cores > 1: - pool = mp.Pool(processes=cores) - priors = pool.starmap(prior_logp, [(sample,) for sample in posterior]) - likelihoods = pool.starmap(likelihood_logp, [(sample,) for sample in posterior]) - else: - priors = [prior_logp(sample) for sample in posterior] - likelihoods = [likelihood_logp(sample) for sample in posterior] - - priors = np.array(priors).squeeze() - likelihoods = np.array(likelihoods).squeeze() - - while beta < 1: - beta, old_beta, weights, sj = calc_beta(beta, likelihoods, threshold) - - model.marginal_likelihood *= sj - # resample based on plausibility weights (selection) - resampling_indexes = np.random.choice(np.arange(draws), size=draws, p=weights) - posterior = posterior[resampling_indexes] - priors = priors[resampling_indexes] - likelihoods = likelihoods[resampling_indexes] - - # compute proposal distribution based on weights - covariance = _calc_covariance(posterior, weights) - proposal = MultivariateNormalProposal(covariance) - - # compute scaling (optional) and number of Markov chains steps (optional), based on the - # acceptance rate of the previous stage - if (tune_scaling or tune_steps) and stage > 0: - scaling, n_steps = _tune( - acc_rate, proposed, tune_scaling, tune_steps, scaling, max_steps, p_acc_rate + self.posterior = self.posterior[resampling_indexes] + self.priors = self.priors[resampling_indexes] + self.likelihoods = self.likelihoods[resampling_indexes] + self.tempered_logp = self.priors + self.likelihoods * self.beta + self.acc_per_chain = self.acc_per_chain[resampling_indexes] + self.scalings = self.scalings[resampling_indexes] + + def update_proposal(self): + """ + Update proposal based on the covariance matrix from tempered posterior + """ + cov = np.cov(self.posterior, bias=False, rowvar=0) + cov = np.atleast_2d(cov) + cov += 1e-6 * np.eye(cov.shape[0]) + if np.isnan(cov).any() or np.isinf(cov).any(): + raise ValueError('Sample covariances not valid! Likely "draws" is too small!') + self.proposal = MultivariateNormalProposal(cov) + + def tune(self): + """ + Tune scaling and/or n_steps based on the acceptance rate. + """ + ave_scaling = np.exp(np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234)) + self.scalings = 0.5 * ( + ave_scaling + np.exp(np.log(self.scalings) + (self.acc_per_chain - 0.234)) + ) + + if self.tune_steps: + acc_rate = max(1.0 / self.proposed, self.acc_rate) + self.n_steps = min( + self.max_steps, max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))) ) - pm._log.info("Stage: {:3d} Beta: {:.3f} Steps: {:3d}".format(stage, beta, n_steps)) - # Apply Metropolis kernel (mutation) - proposed = draws * n_steps - tempered_logp = priors + likelihoods * beta + self.proposed = self.draws * self.n_steps + def mutate(self): + """ + Perform mutation step, i.e. apply selected kernel + """ parameters = ( - proposal, - scaling, - accepted, - any_discrete, - all_discrete, - discrete, - n_steps, - prior_logp, - likelihood_logp, - beta, + self.proposal, + self.scalings, + self.any_discrete, + self.all_discrete, + self.discrete, + self.n_steps, + self.prior_logp, + self.likelihood_logp, + self.beta, ) - if parallel and cores > 1: - results = pool.starmap( + if self.parallel and self.cores > 1: + results = self.pool.starmap( metrop_kernel, [ ( - posterior[draw], - tempered_logp[draw], - priors[draw], - likelihoods[draw], + self.posterior[draw], + self.tempered_logp[draw], + self.priors[draw], + self.likelihoods[draw], + draw, *parameters, ) - for draw in range(draws) + for draw in range(self.draws) ], ) else: results = [ metrop_kernel( - posterior[draw], - tempered_logp[draw], - priors[draw], - likelihoods[draw], + self.posterior[draw], + self.tempered_logp[draw], + self.priors[draw], + self.likelihoods[draw], + draw, *parameters ) - for draw in tqdm(range(draws), disable=not progressbar) + for draw in tqdm(range(self.draws), disable=not self.progressbar) ] - posterior, acc_list, priors, likelihoods = zip(*results) - posterior = np.array(posterior) - priors = np.array(priors) - likelihoods = np.array(likelihoods) - acc_rate = sum(acc_list) / proposed - stage += 1 - - if parallel and cores > 1: - pool.close() - pool.join() - trace = _posterior_to_trace(posterior, variables, model, var_info) - - return trace + self.posterior = np.array(posterior) + self.priors = np.array(priors) + self.likelihoods = np.array(likelihoods) + self.acc_per_chain = np.array(acc_list) + self.acc_rate = np.mean(acc_list) + + def posterior_to_trace(self): + """ + Save results into a PyMC3 trace + """ + lenght_pos = len(self.posterior) + varnames = [v.name for v in self.variables] + + with self.model: + strace = NDArray(self.model) + strace.setup(lenght_pos, 0) + for i in range(lenght_pos): + value = [] + size = 0 + for var in varnames: + shape, new_size = self.var_info[var] + value.append(self.posterior[i][size : size + new_size].reshape(shape)) + size += new_size + strace.record({k: v for k, v in zip(varnames, value)}) + return MultiTrace([strace]) + + +def metrop_kernel( + q_old, + old_tempered_logp, + old_prior, + old_likelihood, + draw, + proposal, + scalings, + any_discrete, + all_discrete, + discrete, + n_steps, + prior_logp, + likelihood_logp, + beta, +): + """ + Metropolis kernel + """ + deltas = np.squeeze(proposal(n_steps) * scalings[draw]) + + accepted = 0 + for n_step in range(n_steps): + delta = deltas[n_step] + + if any_discrete: + if all_discrete: + delta = np.round(delta, 0).astype("int64") + q_old = q_old.astype("int64") + q_new = (q_old + delta).astype("int64") + else: + delta[discrete] = np.round(delta[discrete], 0) + q_new = floatX(q_old + delta) + else: + q_new = floatX(q_old + delta) + + ll = likelihood_logp(q_new) + pl = prior_logp(q_new) + + new_tempered_logp = pl + ll * beta + + q_old, accept = metrop_select(new_tempered_logp - old_tempered_logp, q_new, q_old) + + if accept: + accepted += 1 + old_prior = pl + old_likelihood = ll + old_tempered_logp = new_tempered_logp + + return q_old, accepted / n_steps, old_prior, old_likelihood + + +def logp_forw(out_vars, vars, shared): + """Compile Theano function of the model and the input and output variables. + + Parameters + ---------- + out_vars : List + containing :class:`pymc3.Distribution` for the output variables + vars : List + containing :class:`pymc3.Distribution` for the input variables + shared : List + containing :class:`theano.tensor.Tensor` for depended shared data + """ + out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) + f = theano_function([inarray0], out_list[0]) + f.trust_input = True + return f + + +class PseudoLikelihood: + """ + Pseudo Likelihood + """ + + def __init__(self, epsilon, observations, function, model, var_info, distance, sum_stat): + """ + epsilon : float + Standard deviation of the gaussian pseudo likelihood. + observations : array-like + observed data + function : python function + data simulator + model : PyMC3 model + var_info : dict + generated by ``SMC.initialize_population`` + distance : str + Distance function. Available options are ``absolute_error`` (default) and + ``sum_of_squared_distance``. + sum_stat : bool + Whether to use or not a summary statistics. + """ + self.epsilon = epsilon + self.observations = observations + self.function = function + self.model = model + self.var_info = var_info + self.kernel = self.gauss_kernel + self.dist_func = distance + self.sum_stat = sum_stat + + if distance == "absolute_error": + self.dist_func = self.absolute_error + elif distance == "sum_of_squared_distance": + self.dist_func = self.sum_of_squared_distance + else: + raise ValueError("Distance metric not understood") + + def posterior_to_function(self, posterior): + model = self.model + var_info = self.var_info + parameters = {} + size = 0 + for var, values in var_info.items(): + shape, new_size = values + value = posterior[size : size + new_size].reshape(shape) + if is_transformed_name(var): + var = get_untransformed_name(var) + value = model[var].transformation.backward_val(value) + parameters[var] = value + size += new_size + return parameters + + def gauss_kernel(self, value): + epsilon = self.epsilon + return (-(value ** 2) / epsilon ** 2 + np.log(1 / (2 * np.pi * epsilon ** 2))) / 2.0 + + def absolute_error(self, a, b): + if self.sum_stat: + return np.atleast_2d(np.abs(a.mean() - b.mean())) + else: + return np.mean(np.atleast_2d(np.abs(a - b))) + + def sum_of_squared_distance(self, a, b): + if self.sum_stat: + return np.sum(np.atleast_2d((a.mean() - b.mean()) ** 2)) + else: + return np.mean(np.sum(np.atleast_2d((a - b) ** 2))) + + def __call__(self, posterior): + func_parameters = self.posterior_to_function(posterior) + sim_data = self.function(**func_parameters) + value = self.dist_func(self.observations, sim_data) + return self.kernel(value) diff --git a/pymc3/smc/smc_utils.py b/pymc3/smc/smc_utils.py deleted file mode 100644 index e595fb9249..0000000000 --- a/pymc3/smc/smc_utils.py +++ /dev/null @@ -1,301 +0,0 @@ -""" -SMC and SMC-ABC common functions -""" -from collections import OrderedDict -import numpy as np -import pymc3 as pm -import theano -from ..step_methods.arraystep import metrop_select -from ..step_methods.metropolis import tune -from ..backends.ndarray import NDArray -from ..backends.base import MultiTrace -from ..theanof import floatX, join_nonshared_inputs -from ..util import get_untransformed_name, is_transformed_name - - -def _initial_population(draws, model, variables, start): - """ - Create an initial population from the prior - """ - - population = [] - var_info = OrderedDict() - if start is None: - init_rnd = pm.sample_prior_predictive( - draws, var_names=[v.name for v in model.unobserved_RVs], model=model - ) - else: - init_rnd = start - - init = model.test_point - - for v in variables: - var_info[v.name] = (init[v.name].shape, init[v.name].size) - - for i in range(draws): - - point = pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model) - population.append(model.dict_to_array(point)) - - return np.array(floatX(population)), var_info - - -def _calc_covariance(posterior, weights): - """ - Calculate trace covariance matrix based on importance weights. - """ - cov = np.cov(posterior, aweights=weights.ravel(), bias=False, rowvar=0) - cov = np.atleast_2d(cov) - cov += 1e-6 * np.eye(cov.shape[0]) - if np.isnan(cov).any() or np.isinf(cov).any(): - raise ValueError('Sample covariances not valid! Likely "draws" is too small!') - return cov - - -def _tune(acc_rate, proposed, tune_scaling, tune_steps, scaling, max_steps, p_acc_rate): - """ - Tune scaling and/or n_steps based on the acceptance rate. - - Parameters - ---------- - acc_rate: float - Acceptance rate of the previous stage - proposed: int - Total number of proposed steps (draws * n_steps) - tune_scaling : bool - Whether to compute the scaling factor automatically or not - tune_steps : bool - Whether to compute the number of steps automatically or not - scaling : float - Scaling factor applied to the proposal distribution - max_steps : int - The maximum number of steps of each Markov Chain. - p_acc_rate : float - The higher the value of the higher the number of steps computed automatically. It should be - between 0 and 1. - """ - if tune_scaling: - scaling = tune(scaling, acc_rate) - - if tune_steps: - acc_rate = max(1.0 / proposed, acc_rate) - n_steps = min(max_steps, max(2, int(np.log(1 - p_acc_rate) / np.log(1 - acc_rate)))) - else: - n_steps = max_steps - - return scaling, n_steps - - -def _posterior_to_trace(posterior, variables, model, var_info): - """ - Save results into a PyMC3 trace - """ - lenght_pos = len(posterior) - varnames = [v.name for v in variables] - - with model: - strace = NDArray(model) - strace.setup(lenght_pos, 0) - for i in range(lenght_pos): - value = [] - size = 0 - for var in varnames: - shape, new_size = var_info[var] - value.append(posterior[i][size : size + new_size].reshape(shape)) - size += new_size - strace.record({k: v for k, v in zip(varnames, value)}) - return MultiTrace([strace]) - - -def metrop_kernel( - q_old, - old_tempered_logp, - old_prior, - old_likelihood, - proposal, - scaling, - accepted, - any_discrete, - all_discrete, - discrete, - n_steps, - prior_logp, - likelihood_logp, - beta, -): - """ - Metropolis kernel - """ - deltas = np.squeeze(proposal(n_steps) * scaling) - - for n_step in range(n_steps): - delta = deltas[n_step] - - if any_discrete: - if all_discrete: - delta = np.round(delta, 0).astype("int64") - q_old = q_old.astype("int64") - q_new = (q_old + delta).astype("int64") - else: - delta[discrete] = np.round(delta[discrete], 0) - q_new = floatX(q_old + delta) - else: - q_new = floatX(q_old + delta) - - ll = likelihood_logp(q_new) - pl = prior_logp(q_new) - - new_tempered_logp = pl + ll * beta - - q_old, accept = metrop_select(new_tempered_logp - old_tempered_logp, q_new, q_old) - if accept: - accepted += 1 - old_prior = pl - old_likelihood = ll - old_tempered_logp = new_tempered_logp - - return q_old, accepted, old_prior, old_likelihood - - -def calc_beta(beta, likelihoods, threshold=0.5, psis=True): - """ - Calculate next inverse temperature (beta) and importance weights based on current beta - and tempered likelihood. - - Parameters - ---------- - beta : float - tempering parameter of current stage - likelihoods : numpy array - likelihoods computed from the model - threshold : float - Determines the change of beta from stage to stage, i.e.indirectly the number of stages, - the higher the value of threshold the higher the number of stage. Defaults to 0.5. - It should be between 0 and 1. - - Returns - ------- - new_beta : float - tempering parameter of the next stage - old_beta : float - tempering parameter of the current stage - weights : numpy array - Importance weights (floats) - sj : float - Partial marginal likelihood - """ - low_beta = old_beta = beta - up_beta = 2.0 - rN = int(len(likelihoods) * threshold) - - while up_beta - low_beta > 1e-6: - new_beta = (low_beta + up_beta) / 2.0 - weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max())) - weights = weights_un / np.sum(weights_un) - ESS = int(1 / np.sum(weights ** 2)) - if ESS == rN: - break - elif ESS < rN: - up_beta = new_beta - else: - low_beta = new_beta - if new_beta >= 1: - new_beta = 1 - weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max())) - weights = weights_un / np.sum(weights_un) - - sj = np.exp((new_beta - old_beta) * likelihoods) - - return new_beta, old_beta, weights, np.mean(sj) - - -def logp_forw(out_vars, vars, shared): - """Compile Theano function of the model and the input and output variables. - - Parameters - ---------- - out_vars : List - containing :class:`pymc3.Distribution` for the output variables - vars : List - containing :class:`pymc3.Distribution` for the input variables - shared : List - containing :class:`theano.tensor.Tensor` for depended shared data - """ - out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) - f = theano.function([inarray0], out_list[0]) - f.trust_input = True - return f - - -class PseudoLikelihood: - """ - Pseudo Likelihood - """ - - def __init__( - self, - epsilon, - observations, - function, - model, - var_info, - distance="absolute_error", - sum_stat=False, - ): - """ - kernel : function - a valid scipy.stats distribution. Defaults to `stats.norm` - - """ - self.epsilon = epsilon - self.observations = observations - self.function = function - self.model = model - self.var_info = var_info - self.kernel = self.gauss_kernel - self.dist_func = distance - self.sum_stat = sum_stat - - if distance == "absolute_error": - self.dist_func = self.absolute_error - elif distance == "sum_of_squared_distance": - self.dist_func = self.sum_of_squared_distance - else: - raise ValueError("Distance metric not understood") - - def posterior_to_function(self, posterior): - model = self.model - var_info = self.var_info - parameters = {} - size = 0 - for var, values in var_info.items(): - shape, new_size = values - value = posterior[size : size + new_size].reshape(shape) - if is_transformed_name(var): - var = get_untransformed_name(var) - value = model[var].transformation.backward_val(value) - parameters[var] = value - size += new_size - return parameters - - def gauss_kernel(self, value): - epsilon = self.epsilon - return (-(value ** 2) / epsilon ** 2 + np.log(1 / (2 * np.pi * epsilon ** 2))) / 2.0 - - def absolute_error(self, a, b): - if self.sum_stat: - return np.atleast_2d(np.abs(a.mean() - b.mean())) - else: - return np.mean(np.atleast_2d(np.abs(a - b))) - - def sum_of_squared_distance(self, a, b): - if self.sum_stat: - return np.sum(np.atleast_2d((a.mean() - b.mean()) ** 2)) - else: - return np.mean(np.sum(np.atleast_2d((a - b) ** 2))) - - def __call__(self, posterior): - func_parameters = self.posterior_to_function(posterior) - sim_data = self.function(**func_parameters) - value = self.dist_func(self.observations, sim_data) - return self.kernel(value) diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 16b5d1fb8e..0f36c259e8 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -1,7 +1,6 @@ import pymc3 as pm import numpy as np import theano.tensor as tt - from .helpers import SeededTest @@ -80,3 +79,24 @@ def test_start(self): "b_log__": np.abs(np.random.normal(0, 10, size=500)), } trace = pm.sample_smc(500, start=start) + + +class TestSMCABC(SeededTest): + def setup_class(self): + super().setup_class() + self.data = np.sort(np.random.normal(loc=0, scale=1, size=1000)) + + def normal_sim(a, b): + return np.sort(np.random.normal(a, b, 1000)) + + with pm.Model() as self.SMABC_test: + a = pm.Normal("a", mu=0, sd=5) + b = pm.HalfNormal("b", sd=2) + s = pm.Simulator("s", normal_sim, observed=self.data) + + def test_one_gaussian(self): + with self.SMABC_test: + trace = pm.sample_smc(draws=2000, kernel="ABC", epsilon=0.1) + + np.testing.assert_almost_equal(self.data.mean(), trace["a"].mean(), decimal=2) + np.testing.assert_almost_equal(self.data.std(), trace["b"].mean(), decimal=1) From 8e3464e710b9901e9bb3bfb72f8bf59cbe039007 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Fri, 13 Sep 2019 16:33:34 -0300 Subject: [PATCH 2/4] add release note --- RELEASE-NOTES.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 73f8a4d93f..fb2b12fa18 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -18,9 +18,10 @@ - Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) - Fixed a defect in `OrderedLogistic.__init__` that unnecessarily increased the dimensionality of the underlying `p`. Related to issue issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) but was not the true cause of it. - SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573) -- SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579) -- SMC: improve computation of the proposal scaling factor [3594](https://github.com/pymc-devs/pymc3/pull/3594) +- SMC: is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579) +- SMC: improve computation of the proposal scaling factor [3594](https://github.com/pymc-devs/pymc3/pull/3594) and [3625](https://github.com/pymc-devs/pymc3/pull/3625) - SMC: reduce number of logp evaluations [3600](https://github.com/pymc-devs/pymc3/pull/3600) +- SMC: remove `scaling` and `tune_scaling` arguments as is a better idea to always allow SMC to automatically compute the scaling factor [3625](https://github.com/pymc-devs/pymc3/pull/3625) - Now uses `multiprocessong` rather than `psutil` to count CPUs, which results in reliable core counts on Chromebooks. - `sample_posterior_predictive` now preallocates the memory required for its output to improve memory usage. Addresses problems raised in this [discourse thread](https://discourse.pymc.io/t/memory-error-with-posterior-predictive-sample/2891/4). - Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) From 0bf373b784d38b475fb09ba2c29966fef21e81fe Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Wed, 25 Sep 2019 16:15:00 -0300 Subject: [PATCH 3/4] remove psis commented code --- pymc3/smc/smc.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 4fded6f49c..efd2061537 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -6,8 +6,6 @@ import warnings from theano import function as theano_function -# from arviz import psislw - from ..model import modelcontext, Point from ..parallel_sampling import _cpu_count from ..theanof import inputvars, make_shared_replacements @@ -178,13 +176,6 @@ def update_weights_beta(self, psis): sj = np.exp((new_beta - old_beta) * self.likelihoods) - # if psis: - # lw, ks = psislw(np.log(weights)) - # print(ks) - # if np.any(ks > 0.5): - # print('help', ks) - # weights = np.exp(lw) - self.model.marginal_likelihood *= np.mean(sj) self.beta = new_beta self.weights = weights From 146f8978e16cec827ec9b6f3b370d9d309e61ba3 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Thu, 3 Oct 2019 08:46:44 -0300 Subject: [PATCH 4/4] remove psis argument, add warning, fix typo --- pymc3/smc/sample_smc.py | 3 +-- pymc3/smc/smc.py | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py index c39a7fbf86..aeee7d6609 100644 --- a/pymc3/smc/sample_smc.py +++ b/pymc3/smc/sample_smc.py @@ -18,7 +18,6 @@ def sample_smc( progressbar=False, model=None, random_seed=-1, - psis=True, ): """ Sequential Monte Carlo based sampling @@ -139,7 +138,7 @@ def sample_smc( smc.initialize_logp() while smc.beta < 1: - smc.update_weights_beta(psis=psis) + smc.update_weights_beta() _log.info( "Stage: {:3d} Beta: {:.3f} Steps: {:3d} Acce: {:.3f}".format( stage, smc.beta, smc.n_steps, smc.acc_rate diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index efd2061537..d9abbcd4f6 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -118,6 +118,8 @@ def setup_kernel(self): if self.kernel.lower() == "abc": warnings.warn(EXPERIMENTAL_WARNING) + if len(self.model.observed_RVs) != 1: + warnings.warn("SMC-ABC only works properly with models with one observed variable") simulator = self.model.observed_RVs[0] self.likelihood_logp = PseudoLikelihood( self.epsilon, @@ -148,7 +150,7 @@ def initialize_logp(self): self.priors = np.array(priors).squeeze() self.likelihoods = np.array(likelihoods).squeeze() - def update_weights_beta(self, psis): + def update_weights_beta(self): """ Calculate the next inverse temperature (beta), the importance weights based on current beta and tempered likelihood and updates the marginal likelihood estimation @@ -207,7 +209,7 @@ def update_proposal(self): def tune(self): """ - Tune scaling and/or n_steps based on the acceptance rate. + Tune scaling and n_steps based on the acceptance rate. """ ave_scaling = np.exp(np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234)) self.scalings = 0.5 * (