Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Ordered Multinomial distribution #4773

Merged
merged 20 commits into from
Jul 5, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
Multinomial,
MvNormal,
MvStudentT,
OrderedMultinomial,
Wishart,
WishartBartlett,
)
Expand Down Expand Up @@ -159,6 +160,7 @@
"Dirichlet",
"Multinomial",
"DirichletMultinomial",
"OrderedMultinomial",
"Wishart",
"WishartBartlett",
"LKJCholeskyCov",
Expand Down
68 changes: 50 additions & 18 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
)
from scipy import stats

import pymc3 as pm

from pymc3.aesaraf import floatX, intX, take_along_axis
from pymc3.distributions.dist_math import (
betaln,
Expand Down Expand Up @@ -59,6 +61,7 @@
"HyperGeometric",
"Categorical",
"OrderedLogistic",
"OrderedProbit",
]


Expand Down Expand Up @@ -1636,15 +1639,15 @@ def logcdf(value, psi, n, p):
)


class OrderedLogistic(Categorical):
class _OrderedLogistic(Categorical):
R"""
Ordered Logistic log-likelihood.

Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`. The
cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.

.. math::
Expand All @@ -1665,10 +1668,14 @@ class OrderedLogistic(Categorical):
----------
eta: float
The predictor.
c: array
cutpoints: array
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.
compute_p: boolean, default True
Whether to compute and store in the trace the inferred probabilities of each categories,
based on the cutpoints' values. Defaults to True.
Might be useful to disable it if memory usage is of interest.

Examples
--------
Expand All @@ -1691,7 +1698,7 @@ class OrderedLogistic(Categorical):
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y)
idata = pm.sample(1000)
idata = pm.sample()

# Plot the results
plt.hist(cluster1, 30, alpha=0.5);
Expand All @@ -1700,7 +1707,6 @@ class OrderedLogistic(Categorical):
posterior = idata.posterior.stack(sample=("chain", "draw"))
plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');

"""

rv_op = categorical
Expand All @@ -1721,18 +1727,30 @@ def dist(cls, eta, cutpoints, *args, **kwargs):
)
p = p_cum[..., 1:] - p_cum[..., :-1]

return super().dist(p, **kwargs)
return super().dist(p, *args, **kwargs)


class OrderedProbit(Categorical):
class OrderedLogistic:
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
def __new__(cls, name, *args, compute_p=True, **kwargs):
out_rv = _OrderedLogistic(name, *args, **kwargs)
if compute_p:
pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3])
return out_rv

@classmethod
def dist(cls, *args, **kwargs):
return _OrderedLogistic.dist(*args, **kwargs)


class _OrderedProbit(Categorical):
R"""
Ordered Probit log-likelihood.

Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`. The
cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.

In order to stabilize the computation, log-likelihood is computed
Expand All @@ -1754,13 +1772,16 @@ class OrderedProbit(Categorical):

Parameters
----------
eta : float
eta: float
The predictor.
c : array
cutpoints: array
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.

compute_p: boolean, default True
Whether to compute and store in the trace the inferred probabilities of each categories,
based on the cutpoints' values. Defaults to True.
Might be useful to disable it if memory usage is of interest.
sigma: float
The standard deviation of probit function.
Example
Expand All @@ -1783,7 +1804,7 @@ class OrderedProbit(Categorical):
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y)
idata = pm.sample(1000)
idata = pm.sample()

# Plot the results
plt.hist(cluster1, 30, alpha=0.5);
Expand All @@ -1792,7 +1813,6 @@ class OrderedProbit(Categorical):
posterior = idata.posterior.stack(sample=("chain", "draw"))
plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');

"""

rv_op = categorical
Expand All @@ -1814,4 +1834,16 @@ def dist(cls, eta, cutpoints, *args, **kwargs):
_log_p = at.as_tensor_variable(floatX(_log_p))
p = at.exp(_log_p)

return super().dist(p, **kwargs)
return super().dist(p, *args, **kwargs)


class OrderedProbit:
def __new__(cls, name, *args, compute_p=True, **kwargs):
out_rv = _OrderedProbit(name, *args, **kwargs)
if compute_p:
pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3])
return out_rv

@classmethod
def dist(cls, *args, **kwargs):
return _OrderedProbit.dist(*args, **kwargs)
127 changes: 117 additions & 10 deletions pymc3/distributions/multivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,15 @@
from pymc3.distributions.continuous import ChiSquared, Normal, assert_negative_support
from pymc3.distributions.dist_math import bound, factln, logpow, multigammaln
from pymc3.distributions.distribution import Continuous, Discrete
from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker, sigmoid

__all__ = [
"MvNormal",
"MvStudentT",
"Dirichlet",
"Multinomial",
"DirichletMultinomial",
"OrderedMultinomial",
"Wishart",
"WishartBartlett",
"LKJCorr",
Expand Down Expand Up @@ -109,17 +110,13 @@ def quaddist_parse(value, mu, cov, mat_type="cov"):
onedim = False

delta = value - mu

if mat_type == "cov":
# Use this when Theano#5908 is released.
# return MvNormalLogp()(self.cov, delta)
chol_cov = cholesky(cov)
# Use this when Theano#5908 is released.
# return MvNormalLogp()(self.cov, delta)
chol_cov = cholesky(cov)
if mat_type == "cov" or mat_type != "tau":
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
dist, logdet, ok = quaddist_chol(delta, chol_cov)
elif mat_type == "tau":
dist, logdet, ok = quaddist_tau(delta, chol_cov)
else:
dist, logdet, ok = quaddist_chol(delta, chol_cov)

dist, logdet, ok = quaddist_tau(delta, chol_cov)
if onedim:
return dist[0], logdet, ok

Expand Down Expand Up @@ -687,6 +684,116 @@ def _distr_parameters_for_repr(self):
return ["n", "a"]


class _OrderedMultinomial(Multinomial):
R"""
Ordered Multinomial log-likelihood.

Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`, but
which are _aggregated_ by trial, like multinomial observations (in
contrast to `pm.OrderedLogistic`, which only accepts ordinal data
in a _disaggregated_ format, like categorical observations).
The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.

.. math::

f(k \mid \eta, c) = \left\{
\begin{array}{l}
1 - \text{logit}^{-1}(\eta - c_1)
\,, \text{if } k = 0 \\
\text{logit}^{-1}(\eta - c_{k - 1}) -
\text{logit}^{-1}(\eta - c_{k})
\,, \text{if } 0 < k < K \\
\text{logit}^{-1}(\eta - c_{K - 1})
\,, \text{if } k = K \\
\end{array}
\right.

Parameters
----------
eta: float
The predictor.
cutpoints: array
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.
n: int
The total number of multinomial trials.
compute_p: boolean, default True
Whether to compute and store in the trace the inferred probabilities of each categories,
based on the cutpoints' values. Defaults to True.
Might be useful to disable it if memory usage is of interest.

Examples
--------

.. code-block:: python

# Generate data for a simple 1 dimensional example problem
true_cum_p = np.array([0.1, 0.15, 0.25, 0.50, 0.65, 0.90, 1.0])
true_p = np.hstack([true_cum_p[0], true_cum_p[1:] - true_cum_p[:-1]])
fake_elections = np.random.multinomial(n=1_000, pvals=true_p, size=60)

# Ordered multinomial regression
with pm.Model() as model:
cutpoints = pm.Normal(
"cutpoints",
mu=np.arange(6) - 2.5,
sigma=1.5,
initval=np.arange(6) - 2.5,
transform=pm.distributions.transforms.ordered,
)

pm.OrderedMultinomial(
"results",
eta=0.0,
cutpoints=cutpoints,
n=fake_elections.sum(1),
observed=fake_elections,
)

trace = pm.sample()

# Plot the results
arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p));
"""
rv_op = multinomial
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def dist(cls, eta, cutpoints, n, *args, **kwargs):
eta = at.as_tensor_variable(floatX(eta))
cutpoints = at.as_tensor_variable(cutpoints)
n = at.as_tensor_variable(intX(n))

pa = sigmoid(cutpoints - at.shape_padright(eta))
p_cum = at.concatenate(
[
at.zeros_like(at.shape_padright(pa[..., 0])),
pa,
at.ones_like(at.shape_padright(pa[..., 0])),
],
axis=-1,
)
p = p_cum[..., 1:] - p_cum[..., :-1]

return super().dist(n, p, *args, **kwargs)


class OrderedMultinomial:
def __new__(cls, name, *args, compute_p=True, **kwargs):
out_rv = _OrderedMultinomial(name, *args, **kwargs)
if compute_p:
pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[4])
return out_rv

@classmethod
def dist(cls, *args, **kwargs):
return _OrderedMultinomial.dist(*args, **kwargs)


def posdef(AA):
try:
linalg.cholesky(AA)
Expand Down
32 changes: 32 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2898,6 +2898,38 @@ def test_orderedlogistic_dimensions(shape):
assert np.allclose(ologp, expected)


def test_ordered_multinomial_probs():
with pm.Model() as m:
pm.OrderedMultinomial("om_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0)
pm.OrderedMultinomial(
"om_no_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False
)
assert len(m.deterministics) == 1

x = pm.OrderedMultinomial.dist(n=1000, cutpoints=np.array([-2, 0, 2]), eta=0)
assert isinstance(x, TensorVariable)


def test_ordered_logistic_probs():
with pm.Model() as m:
pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0)
pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
assert len(m.deterministics) == 1

x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
assert isinstance(x, TensorVariable)


def test_ordered_probit_probs():
with pm.Model() as m:
pm.OrderedProbit("op_p", cutpoints=np.array([-2, 0, 2]), eta=0)
pm.OrderedProbit("op_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
assert len(m.deterministics) == 1

x = pm.OrderedProbit.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
assert isinstance(x, TensorVariable)


@pytest.mark.xfail(reason="Distribution not refactored yet")
@pytest.mark.parametrize("size", [(1,), (4,)], ids=str)
def test_car_logp(size):
Expand Down
12 changes: 12 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,18 @@ class TestOrderedProbit(BaseTestDistribution):
]


class TestOrderedMultinomial(BaseTestDistribution):
pymc_dist = pm.OrderedMultinomial
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2]), "n": 1000}
sizes_to_check = [None, (1), (4,), (3, 2)]
sizes_expected = [(4,), (1, 4), (4, 4), (3, 2, 4)]
expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])}
tests_to_run = [
"check_pymc_params_match_rv_op",
"check_rv_size",
]


class TestInterpolated(BaseTestDistribution):
def interpolated_rng_fn(self, size, mu, sigma, rng):
return st.norm.rvs(loc=mu, scale=sigma, size=size)
Expand Down