Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cap the values that Beta.random can generate. #3924

Merged
merged 6 commits into from
May 15, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- End of sampling report now uses `arviz.InferenceData` internally and avoids storing
pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)).
- The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)).
- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3924](https://github.com/pymc-devs/pymc3/pull/3924)).

### Deprecations
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
Expand Down
3 changes: 2 additions & 1 deletion pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .dist_math import (
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
clipped_beta_rvs,
)
from .distribution import (Continuous, draw_values, generate_samples)

Expand Down Expand Up @@ -1290,7 +1291,7 @@ def random(self, point=None, size=None):
"""
alpha, beta = draw_values([self.alpha, self.beta],
point=point, size=size)
return generate_samples(stats.beta.rvs, alpha, beta,
return generate_samples(clipped_beta_rvs, alpha, beta,
dist_shape=self.shape,
size=size)

Expand Down
47 changes: 47 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

f = floatX
c = - .5 * np.log(2. * np.pi)
_beta_clip_values = {
dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype))
for dtype in ["float16", "float32", "float64", "float128"]
}



def bound(logp, *conditions, **kwargs):
Expand Down Expand Up @@ -548,3 +553,45 @@ def incomplete_beta(a, b, value):
tt.and_(tt.le(b * value, one), tt.le(value, 0.95)),
ps,
t))


def clipped_beta_rvs(a, b, size=None, dtype="float64"):
"""Draw beta distributed random samples in the open :math:`(0, 1)` interval.

The samples are generated with ``numpy.random.beta``, but any value that
is equal to 0 or 1 will be shifted towards the next floating point in the
interval :math:`[0, 1]`, depending on the floating point precision that is
given by ``dtype``.

Parameters
----------
a : float or array_like of floats
Alpha, positive (>0).
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
b : float or array_like of floats
Beta, positive (>0).
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
a single value is returned if ``a`` and ``b`` are both scalars.
Otherwise, ``np.broadcast(a, b).size`` samples are drawn.
dtype : str or dtype instance
The floating point precision that the samples should have. This also
determines the value that will be used to shift any samples returned
by the numpy random number generator that are zero or one.

Returns
-------
out : ndarray or scalar
Drawn samples from the parameterized beta distribution. The numpy
implementation can yield values that are equal to zero or one. We
assume the support of the Beta distribution to be in the open interval
:math:`(0, 1)`, so we shift any sample that is equal to 0 to
``np.nextafter(0, 1, dtype=dtype)`` and any sample that is equal to 1
if shifted to ``np.nextafter(1, 0, dtype=dtype)``.
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved

"""
out = np.random.beta(a, b, size=size).astype(dtype)
lower, upper = _beta_clip_values[dtype]
out[out == 0] = lower
out[out == 1] = upper
return out
12 changes: 11 additions & 1 deletion pymc3/tests/test_dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
from ..theanof import floatX
from ..distributions import Discrete
from ..distributions.dist_math import (
bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e)
bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e,
clipped_beta_rvs,
)


def test_bound():
Expand Down Expand Up @@ -216,3 +218,11 @@ def test_grad(self):
utt.verify_grad(i0e, [-2.])
utt.verify_grad(i0e, [[0.5, -2.]])
utt.verify_grad(i0e, [[[0.5, -2.]]])


@pytest.mark.parametrize("dtype", ["float16", "float32", "float64", "float128"])
def test_clipped_beta_rvs(dtype):
# Verify that the samples drawn from the beta distribution are never
# equal to zero or one (issue #3898)
values = clipped_beta_rvs(0.01, 0.01, size=1000000, dtype=dtype)
assert not (np.any(values == 0) or np.any(values == 1))