Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .random method to MvGaussianRandomWalk (Issue #4337) #4388

Merged
merged 15 commits into from
Jan 3, 2021
Merged
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
- `OrderedProbit` distribution added (see [#4232](https://github.com/pymc-devs/pymc3/pull/4232)).
- `plot_posterior_predictive_glm` now works with `arviz.InferenceData` as well (see [#4234](https://github.com/pymc-devs/pymc3/pull/4234))
- Add `logcdf` method to all univariate discrete distributions (see [#4387](https://github.com/pymc-devs/pymc3/pull/4387)).
- Add `random` method to `MvGaussianRandomWalk` (see [#4388](https://github.com/pymc-devs/pymc3/pull/4388))

### Maintenance
- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)
Expand Down
73 changes: 71 additions & 2 deletions pymc3/distributions/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def _random(self, sigma, mu, size, sample_shape):
"""Implement a Gaussian random walk as a cumulative sum of normals.
axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated.
This might need to be corrected in future when issue #4010 is fixed.
Lines 318-322 ties the starting point of each instance of random walk to 0"
"""
if size[len(sample_shape)] == sample_shape:
axis = len(sample_shape)
Expand All @@ -284,6 +283,8 @@ def _random(self, sigma, mu, size, sample_shape):
rv = stats.norm(mu, sigma)
data = rv.rvs(size).cumsum(axis=axis)
data = np.array(data)

# the following lines center the random walk to start at the origin.
if len(data.shape) > 1:
for i in range(data.shape[0]):
data[i] = data[i] - data[i][0]
Expand Down Expand Up @@ -435,7 +436,7 @@ def __init__(

self.init = init
self.innovArgs = (mu, cov, tau, chol, lower)
self.innov = multivariate.MvNormal.dist(*self.innovArgs)
self.innov = multivariate.MvNormal.dist(*self.innovArgs, shape=self.shape)
self.mean = tt.as_tensor_variable(0.0)

def logp(self, x):
Expand All @@ -452,6 +453,10 @@ def logp(self, x):
-------
TensorVariable
"""

if x.ndim == 1:
x = x[np.newaxis, :]

x_im1 = x[:-1]
x_i = x[1:]

Expand All @@ -460,6 +465,70 @@ def logp(self, x):
def _distr_parameters_for_repr(self):
return ["mu", "cov"]

def random(self, point=None, size=None):
"""
Draw random values from MvGaussianRandomWalk.

Parameters
----------
point: dict, optional
Dict of variable values on which random values are to be
conditioned (uses default point if not specified).
size: int or tuple of ints, optional
Desired size of random sample (returns one sample if not
specified).

Returns
-------
array


Examples
-------
.. code-block:: python

with pm.Model():
mu = np.array([1.0, 0.0])
cov = np.array([[1.0, 0.0],
[0.0, 2.0]])

# draw one sample from a 2-dimensional Gaussian random walk with 10 timesteps
sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random()

# draw three samples from a 2-dimensional Gaussian random walk with 10 timesteps
sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random(size=3)

# draw four samples from a 2-dimensional Gaussian random walk with 10 timesteps,
# indexed with a (2, 2) array
sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random(size=(2, 2))
"""

# for each draw specified by the size input, we need to draw time_steps many
# samples from MvNormal.

size = to_tuple(size)
multivariate_samples = self.innov.random(point=point, size=size)
# this has shape (size, self.shape)
if len(self.shape) == 2:
# have time dimension in first slot of shape. Therefore the time
# component can be accessed with the index equal to the length of size.
time_axis = len(size)
multivariate_samples = multivariate_samples.cumsum(axis=time_axis)
if time_axis != 0:
# this for loop covers the case where size is a tuple
for idx in np.ndindex(size):
multivariate_samples[idx] = (
multivariate_samples[idx] - multivariate_samples[idx][0]
)
else:
# size was passed as None
multivariate_samples = multivariate_samples - multivariate_samples[0]

# if the above statement fails, then only a spatial dimension was passed in for self.shape.
# Therefore don't subtract off the initial value since otherwise you get all zeros
# as your output.
return multivariate_samples


class MvStudentTRandomWalk(MvGaussianRandomWalk):
r"""
Expand Down
50 changes: 50 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -1720,3 +1720,53 @@ def test_matrix_normal_random_with_random_variables():
prior = pm.sample_prior_predictive(2)

assert prior["mu"].shape == (2, D, K)


class TestMvGaussianRandomWalk(SeededTest):
@pytest.mark.parametrize(
["sample_shape", "dist_shape", "mu_shape", "param"],
generate_shapes(include_params=True),
ids=str,
)
def test_with_np_arrays(self, sample_shape, dist_shape, mu_shape, param):
dist = pm.MvGaussianRandomWalk.dist(
mu=np.ones(mu_shape), **{param: np.eye(3)}, shape=dist_shape
)
output_shape = to_tuple(sample_shape) + dist_shape
assert dist.random(size=sample_shape).shape == output_shape

@pytest.mark.xfail
@pytest.mark.parametrize(
["sample_shape", "dist_shape", "mu_shape"],
generate_shapes(include_params=False),
ids=str,
)
def test_with_chol_rv(self, sample_shape, dist_shape, mu_shape):
with pm.Model() as model:
mu = pm.Normal("mu", 0.0, 1.0, shape=mu_shape)
sd_dist = pm.Exponential.dist(1.0, shape=3)
chol, corr, stds = pm.LKJCholeskyCov(
"chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True
)
mv = pm.MvGaussianRandomWalk("mv", mu, chol=chol, shape=dist_shape)
prior = pm.sample_prior_predictive(samples=sample_shape)

assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape

@pytest.mark.xfail
@pytest.mark.parametrize(
["sample_shape", "dist_shape", "mu_shape"],
generate_shapes(include_params=False),
ids=str,
)
def test_with_cov_rv(self, sample_shape, dist_shape, mu_shape):
with pm.Model() as model:
mu = pm.Normal("mu", 0.0, 1.0, shape=mu_shape)
sd_dist = pm.Exponential.dist(1.0, shape=3)
chol, corr, stds = pm.LKJCholeskyCov(
"chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True
)
mv = pm.MvGaussianRandomWalk("mv", mu, cov=pm.math.dot(chol, chol.T), shape=dist_shape)
prior = pm.sample_prior_predictive(samples=sample_shape)

assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape