pymc-devs · AlexAndorra · Jul 5, 2021 · Jun 15, 2021 · Jun 15, 2021 · Jun 16, 2021
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -94,6 +94,7 @@
     Multinomial,
     MvNormal,
     MvStudentT,
+    OrderedMultinomial,
     Wishart,
     WishartBartlett,
 )
@@ -159,6 +160,7 @@
     "Dirichlet",
     "Multinomial",
     "DirichletMultinomial",
+    "OrderedMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCholeskyCov",

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -27,6 +27,8 @@
 )
 from scipy import stats
 
+import pymc3 as pm
+
 from pymc3.aesaraf import floatX, intX, take_along_axis
 from pymc3.distributions.dist_math import (
     betaln,
@@ -59,6 +61,7 @@
     "HyperGeometric",
     "Categorical",
     "OrderedLogistic",
+    "OrderedProbit",
 ]
 
 
@@ -1636,15 +1639,15 @@ def logcdf(value, psi, n, p):
         )
 
 
-class OrderedLogistic(Categorical):
+class _OrderedLogistic(Categorical):
     R"""
     Ordered Logistic log-likelihood.
 
     Useful for regression on ordinal data values whose values range
     from 1 to K as a function of some predictor, :math:`\eta`. The
     cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
-    mapped to which of the K observed dependent variables.  The number
-    of cutpoints is K - 1.  It is recommended that the cutpoints are
+    mapped to which of the K observed dependent variables. The number
+    of cutpoints is K - 1. It is recommended that the cutpoints are
     constrained to be ordered.
 
     .. math::
@@ -1665,10 +1668,14 @@ class OrderedLogistic(Categorical):
     ----------
     eta: float
         The predictor.
-    c: array
+    cutpoints: array
         The length K - 1 array of cutpoints which break :math:`\eta` into
-        ranges.  Do not explicitly set the first and last elements of
+        ranges. Do not explicitly set the first and last elements of
         :math:`c` to negative and positive infinity.
+    compute_p: boolean, default True
+        Whether to compute and store in the trace the inferred probabilities of each categories,
+        based on the cutpoints' values. Defaults to True.
+        Might be useful to disable it if memory usage is of interest.
 
     Examples
     --------
@@ -1691,7 +1698,7 @@ class OrderedLogistic(Categorical):
             cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
                                   transform=pm.distributions.transforms.ordered)
             y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y)
-            idata = pm.sample(1000)
+            idata = pm.sample()
 
         # Plot the results
         plt.hist(cluster1, 30, alpha=0.5);
@@ -1700,7 +1707,6 @@ class OrderedLogistic(Categorical):
         posterior = idata.posterior.stack(sample=("chain", "draw"))
         plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
         plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
-
     """
 
     rv_op = categorical
@@ -1721,18 +1727,30 @@ def dist(cls, eta, cutpoints, *args, **kwargs):
         )
         p = p_cum[..., 1:] - p_cum[..., :-1]
 
-        return super().dist(p, **kwargs)
+        return super().dist(p, *args, **kwargs)
 
 
-class OrderedProbit(Categorical):
+class OrderedLogistic:
+    def __new__(cls, name, *args, compute_p=True, **kwargs):
+        out_rv = _OrderedLogistic(name, *args, **kwargs)
+        if compute_p:
+            pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3])
+        return out_rv
+
+    @classmethod
+    def dist(cls, *args, **kwargs):
+        return _OrderedLogistic.dist(*args, **kwargs)
+
+
+class _OrderedProbit(Categorical):
     R"""
     Ordered Probit log-likelihood.
 
     Useful for regression on ordinal data values whose values range
     from 1 to K as a function of some predictor, :math:`\eta`. The
     cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
-    mapped to which of the K observed dependent variables.  The number
-    of cutpoints is K - 1.  It is recommended that the cutpoints are
+    mapped to which of the K observed dependent variables. The number
+    of cutpoints is K - 1. It is recommended that the cutpoints are
     constrained to be ordered.
 
     In order to stabilize the computation, log-likelihood is computed
@@ -1754,13 +1772,16 @@ class OrderedProbit(Categorical):
 
     Parameters
     ----------
-    eta : float
+    eta: float
         The predictor.
-    c : array
+    cutpoints: array
         The length K - 1 array of cutpoints which break :math:`\eta` into
-        ranges.  Do not explicitly set the first and last elements of
+        ranges. Do not explicitly set the first and last elements of
         :math:`c` to negative and positive infinity.
-
+    compute_p: boolean, default True
+        Whether to compute and store in the trace the inferred probabilities of each categories,
+        based on the cutpoints' values. Defaults to True.
+        Might be useful to disable it if memory usage is of interest.
     sigma: float
          The standard deviation of probit function.
     Example
@@ -1783,7 +1804,7 @@ class OrderedProbit(Categorical):
             cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
                                   transform=pm.distributions.transforms.ordered)
             y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y)
-            idata = pm.sample(1000)
+            idata = pm.sample()
 
         # Plot the results
         plt.hist(cluster1, 30, alpha=0.5);
@@ -1792,7 +1813,6 @@ class OrderedProbit(Categorical):
         posterior = idata.posterior.stack(sample=("chain", "draw"))
         plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
         plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
-
     """
 
     rv_op = categorical
@@ -1814,4 +1834,16 @@ def dist(cls, eta, cutpoints, *args, **kwargs):
         _log_p = at.as_tensor_variable(floatX(_log_p))
         p = at.exp(_log_p)
 
-        return super().dist(p, **kwargs)
+        return super().dist(p, *args, **kwargs)
+
+
+class OrderedProbit:
+    def __new__(cls, name, *args, compute_p=True, **kwargs):
+        out_rv = _OrderedProbit(name, *args, **kwargs)
+        if compute_p:
+            pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3])
+        return out_rv
+
+    @classmethod
+    def dist(cls, *args, **kwargs):
+        return _OrderedProbit.dist(*args, **kwargs)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -45,14 +45,15 @@
 from pymc3.distributions.continuous import ChiSquared, Normal, assert_negative_support
 from pymc3.distributions.dist_math import bound, factln, logpow, multigammaln
 from pymc3.distributions.distribution import Continuous, Discrete
-from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
+from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker, sigmoid
 
 __all__ = [
     "MvNormal",
     "MvStudentT",
     "Dirichlet",
     "Multinomial",
     "DirichletMultinomial",
+    "OrderedMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCorr",
@@ -109,17 +110,13 @@ def quaddist_parse(value, mu, cov, mat_type="cov"):
         onedim = False
 
     delta = value - mu
-
-    if mat_type == "cov":
-        # Use this when Theano#5908 is released.
-        # return MvNormalLogp()(self.cov, delta)
-        chol_cov = cholesky(cov)
+    # Use this when Theano#5908 is released.
+    # return MvNormalLogp()(self.cov, delta)
+    chol_cov = cholesky(cov)
+    if mat_type == "cov" or mat_type != "tau":
         dist, logdet, ok = quaddist_chol(delta, chol_cov)
-    elif mat_type == "tau":
-        dist, logdet, ok = quaddist_tau(delta, chol_cov)
     else:
-        dist, logdet, ok = quaddist_chol(delta, chol_cov)
-
+        dist, logdet, ok = quaddist_tau(delta, chol_cov)
     if onedim:
         return dist[0], logdet, ok
 
@@ -687,6 +684,116 @@ def _distr_parameters_for_repr(self):
         return ["n", "a"]
 
 
+class _OrderedMultinomial(Multinomial):
+    R"""
+        Ordered Multinomial log-likelihood.
+
+        Useful for regression on ordinal data values whose values range
+        from 1 to K as a function of some predictor, :math:`\eta`, but
+         which are _aggregated_ by trial, like multinomial observations (in
+         contrast to `pm.OrderedLogistic`, which only accepts ordinal data
+         in a _disaggregated_ format, like categorical observations).
+         The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
+        mapped to which of the K observed dependent variables. The number
+        of cutpoints is K - 1. It is recommended that the cutpoints are
+        constrained to be ordered.
+
+        .. math::
+
+           f(k \mid \eta, c) = \left\{
+             \begin{array}{l}
+               1 - \text{logit}^{-1}(\eta - c_1)
+                 \,, \text{if } k = 0 \\
+               \text{logit}^{-1}(\eta - c_{k - 1}) -
+               \text{logit}^{-1}(\eta - c_{k})
+                 \,, \text{if } 0 < k < K \\
+               \text{logit}^{-1}(\eta - c_{K - 1})
+                 \,, \text{if } k = K \\
+             \end{array}
+           \right.
+
+        Parameters
+        ----------
+        eta: float
+            The predictor.
+        cutpoints: array
+            The length K - 1 array of cutpoints which break :math:`\eta` into
+            ranges. Do not explicitly set the first and last elements of
+            :math:`c` to negative and positive infinity.
+        n: int
+            The total number of multinomial trials.
+        compute_p: boolean, default True
+            Whether to compute and store in the trace the inferred probabilities of each categories,
+            based on the cutpoints' values. Defaults to True.
+            Might be useful to disable it if memory usage is of interest.
+
+        Examples
+        --------
+
+        .. code-block:: python
+
+            # Generate data for a simple 1 dimensional example problem
+            true_cum_p = np.array([0.1, 0.15, 0.25, 0.50, 0.65, 0.90, 1.0])
+            true_p = np.hstack([true_cum_p[0], true_cum_p[1:] - true_cum_p[:-1]])
+            fake_elections = np.random.multinomial(n=1_000, pvals=true_p, size=60)
+
+            # Ordered multinomial regression
+            with pm.Model() as model:
+                cutpoints = pm.Normal(
+                    "cutpoints",
+                    mu=np.arange(6) - 2.5,
+                    sigma=1.5,
+                    initval=np.arange(6) - 2.5,
+                    transform=pm.distributions.transforms.ordered,
+                )
+
+                pm.OrderedMultinomial(
+                    "results",
+                    eta=0.0,
+                    cutpoints=cutpoints,
+                    n=fake_elections.sum(1),
+                    observed=fake_elections,
+                )
+
+                trace = pm.sample()
+
+            # Plot the results
+            arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p));
+        """
+    rv_op = multinomial
+
+    @classmethod
+    def dist(cls, eta, cutpoints, n, *args, **kwargs):
+        eta = at.as_tensor_variable(floatX(eta))
+        cutpoints = at.as_tensor_variable(cutpoints)
+        n = at.as_tensor_variable(intX(n))
+
+        pa = sigmoid(cutpoints - at.shape_padright(eta))
+        p_cum = at.concatenate(
+            [
+                at.zeros_like(at.shape_padright(pa[..., 0])),
+                pa,
+                at.ones_like(at.shape_padright(pa[..., 0])),
+            ],
+            axis=-1,
+        )
+        p = p_cum[..., 1:] - p_cum[..., :-1]
+
+        return super().dist(n, p, *args, **kwargs)
+
+
+class OrderedMultinomial:
+    def __new__(cls, name, *args, compute_p=True, **kwargs):
+        out_rv = _OrderedMultinomial(name, *args, **kwargs)
+        if compute_p:
+            pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[4])
+        return out_rv
+
+    @classmethod
+    def dist(cls, *args, **kwargs):
+        return _OrderedMultinomial.dist(*args, **kwargs)
+
+
 def posdef(AA):
     try:
         linalg.cholesky(AA)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -2898,6 +2898,38 @@ def test_orderedlogistic_dimensions(shape):
     assert np.allclose(ologp, expected)
 
 
+def test_ordered_multinomial_probs():
+    with pm.Model() as m:
+        pm.OrderedMultinomial("om_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0)
+        pm.OrderedMultinomial(
+            "om_no_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False
+        )
+    assert len(m.deterministics) == 1
+
+    x = pm.OrderedMultinomial.dist(n=1000, cutpoints=np.array([-2, 0, 2]), eta=0)
+    assert isinstance(x, TensorVariable)
+
+
+def test_ordered_logistic_probs():
+    with pm.Model() as m:
+        pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0)
+        pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
+    assert len(m.deterministics) == 1
+
+    x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
+    assert isinstance(x, TensorVariable)
+
+
+def test_ordered_probit_probs():
+    with pm.Model() as m:
+        pm.OrderedProbit("op_p", cutpoints=np.array([-2, 0, 2]), eta=0)
+        pm.OrderedProbit("op_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
+    assert len(m.deterministics) == 1
+
+    x = pm.OrderedProbit.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
+    assert isinstance(x, TensorVariable)
+
+
 @pytest.mark.xfail(reason="Distribution not refactored yet")
 @pytest.mark.parametrize("size", [(1,), (4,)], ids=str)
 def test_car_logp(size):

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -1306,6 +1306,18 @@ class TestOrderedProbit(BaseTestDistribution):
     ]
 
 
+class TestOrderedMultinomial(BaseTestDistribution):
+    pymc_dist = pm.OrderedMultinomial
+    pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2]), "n": 1000}
+    sizes_to_check = [None, (1), (4,), (3, 2)]
+    sizes_expected = [(4,), (1, 4), (4, 4), (3, 2, 4)]
+    expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])}
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_rv_size",
+    ]
+
+
 class TestInterpolated(BaseTestDistribution):
     def interpolated_rng_fn(self, size, mu, sigma, rng):
         return st.norm.rvs(loc=mu, scale=sigma, size=size)