JaxGaussianProcesses · thomaspinder · Apr 27, 2023 · Apr 13, 2023 · Apr 13, 2023 · Apr 13, 2023
diff --git a/gpjax/__init__.py b/gpjax/__init__.py
@@ -13,19 +13,21 @@
 # limitations under the License.
 # ==============================================================================
 
-from .dataset import Dataset
-from .fit import fit
-from .gps import Prior, construct_posterior
-from .kernels import *
-from .likelihoods import Bernoulli, Gaussian
-from .mean_functions import Constant, Zero
-from .objectives import (ELBO, CollapsedELBO, ConjugateMLL,
-                         LogPosteriorDensity, NonConjugateMLL)
-from .variational_families import (CollapsedVariationalGaussian,
-                                   ExpectationVariationalGaussian,
-                                   NaturalVariationalGaussian,
-                                   VariationalGaussian,
-                                   WhitenedVariationalGaussian)
+from jaxtyping import install_import_hook
+with install_import_hook("gpjax", "beartype.beartype"):
+    from .dataset import Dataset
+    from .fit import fit
+    from .gps import Prior, construct_posterior
+    from .kernels import *
+    from .likelihoods import Bernoulli, Gaussian
+    from .mean_functions import Constant, Zero
+    from .objectives import (ELBO, CollapsedELBO, ConjugateMLL,
+                             LogPosteriorDensity, NonConjugateMLL)
+    from .variational_families import (CollapsedVariationalGaussian,
+                                       ExpectationVariationalGaussian,
+                                       NaturalVariationalGaussian,
+                                       VariationalGaussian,
+                                       WhitenedVariationalGaussian)
 
 __license__ = "MIT"
 __description__ = "Didactic Gaussian processes in JAX"

diff --git a/gpjax/base/module.py b/gpjax/base/module.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 # ==============================================================================
 
-from __future__ import annotations
 
 __all__ = ["Module", "meta_leaves", "meta_flatten", "meta_map", "meta"]
 
 import dataclasses
 import os
 from copy import copy, deepcopy
-from typing import Any, Callable, Dict, Iterable, List, Tuple
+from beartype.typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union
 
 import jax
 import jax.tree_util as jtu
@@ -31,7 +30,9 @@
                               PyTreeCheckpointer, PyTreeCheckpointHandler,
                               RestoreArgs, SaveArgs)
 from simple_pytree import Pytree, static_field
-from typing_extensions import Self
+
+
+Self = TypeVar('T')
 
 
 class Module(Pytree):
@@ -49,7 +50,7 @@ def __init_subclass__(cls, mutable: bool = False):
             ):
                 cls._pytree__meta[field] = {**value.metadata}
 
-    def replace(self, **kwargs: Any) -> Self:
+    def replace(self: Self, **kwargs: Any) -> Self:
         """
         Replace the values of the fields of the object.
 
@@ -68,7 +69,7 @@ def replace(self, **kwargs: Any) -> Self:
         pytree.__dict__.update(kwargs)
         return pytree
 
-    def replace_meta(self, **kwargs: Any) -> Self:
+    def replace_meta(self: Self, **kwargs: Any) -> Self:
         """
         Replace the metadata of the fields.
 
@@ -87,7 +88,7 @@ def replace_meta(self, **kwargs: Any) -> Self:
         pytree.__dict__.update(_pytree__meta={**pytree._pytree__meta, **kwargs})
         return pytree
 
-    def update_meta(self, **kwargs: Any) -> Self:
+    def update_meta(self: Self, **kwargs: Any) -> Self:
         """
         Update the metadata of the fields. The metadata must already exist.
 
@@ -112,15 +113,15 @@ def update_meta(self, **kwargs: Any) -> Self:
         pytree.__dict__.update(_pytree__meta=new)
         return pytree
 
-    def replace_trainable(self: Module, **kwargs: Dict[str, bool]) -> Self:
+    def replace_trainable(self: Self, **kwargs: Dict[str, bool]) -> Self:
         """Replace the trainability status of local nodes of the Module."""
         return self.update_meta(**{k: {"trainable": v} for k, v in kwargs.items()})
 
-    def replace_bijector(self: Module, **kwargs: Dict[str, tfb.Bijector]) -> Self:
+    def replace_bijector(self: Self, **kwargs: Dict[str, tfb.Bijector]) -> Self:
         """Replace the bijectors of local nodes of the Module."""
         return self.update_meta(**{k: {"bijector": v} for k, v in kwargs.items()})
 
-    def constrain(self) -> Self:
+    def constrain(self: Self) -> Self:
         """Transform model parameters to the constrained space according to their defined bijectors.
 
         Returns:
@@ -137,7 +138,7 @@ def _apply_constrain(meta_leaf):
 
         return meta_map(_apply_constrain, self)
 
-    def unconstrain(self) -> Self:
+    def unconstrain(self: Self) -> Self:
         """Transform model parameters to the unconstrained space according to their defined bijectors.
 
         Returns:
@@ -154,7 +155,7 @@ def _apply_unconstrain(meta_leaf):
 
         return meta_map(_apply_unconstrain, self)
 
-    def stop_gradient(self) -> Self:
+    def stop_gradient(self: Self) -> Self:
         """Stop gradients flowing through the Module.
 
         Returns:
@@ -176,7 +177,7 @@ def _apply_stop_grad(meta_leaf):
         return meta_map(_apply_stop_grad, self)
 
 
-def _toplevel_meta(pytree: Any) -> List[Dict[str, Any]]:
+def _toplevel_meta(pytree: Any) -> List[Optional[Dict[str, Any]]]:
     """Unpacks a list of meta corresponding to the top-level nodes of the pytree.
 
     Args:
@@ -197,7 +198,7 @@ def _toplevel_meta(pytree: Any) -> List[Dict[str, Any]]:
 def meta_leaves(
     pytree: Module,
     *,
-    is_leaf: Callable[[Any], bool] | None = None,
+    is_leaf: Optional[Callable[[Any], bool]] = None,
 ) -> List[Tuple[Dict[str, Any], Any]]:
     """
     Returns the meta of the leaves of the pytree.
@@ -212,8 +213,8 @@ def meta_leaves(
 
     def _unpack_metadata(
         meta_leaf: Any,
-        pytree: Module,
-        is_leaf: Callable[[Any], bool] | None,
+        pytree: Union[Module, Any],
+        is_leaf: Optional[Callable[[Any], bool]],
     ):
         """Recursively unpack leaf metadata."""
         if is_leaf and is_leaf(pytree):
@@ -235,8 +236,8 @@ def _unpack_metadata(
 
 
 def meta_flatten(
-    pytree: Module, *, is_leaf: Callable[[Any], bool] | None = None
-) -> Module:
+    pytree: Union[Module, Any], *, is_leaf: Optional[Callable[[Any], bool]] = None
+) -> Union[Module, Any]:
     """
     Returns the meta of the Module.
 
@@ -254,10 +255,10 @@ def meta_flatten(
 
 def meta_map(
     f: Callable[[Any, Dict[str, Any]], Any],
-    pytree: Module,
+    pytree: Union[Module, Any],
     *rest: Any,
-    is_leaf: Callable[[Any], bool] | None = None,
-) -> Module:
+    is_leaf: Optional[Callable[[Any], bool]] = None,
+) -> Union[Module, Any]:
     """Apply a function to a Module where the first argument are the pytree leaves, and the second argument are the Module metadata leaves.
     Args:
         f (Callable[[Any, Dict[str, Any]], Any]): The function to apply to the pytree.
@@ -273,7 +274,7 @@ def meta_map(
     return treedef.unflatten(f(*xs) for xs in zip(*all_leaves))
 
 
-def meta(pytree: Module, *, is_leaf: Callable[[Any], bool] | None = None) -> Module:
+def meta(pytree: Module, *, is_leaf: Optional[Callable[[Any], bool]] = None) -> Module:
     """Returns the metadata of the Module as a pytree.
 
     Args:

diff --git a/gpjax/base/param.py b/gpjax/base/param.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 # ==============================================================================
 
-from __future__ import annotations
 
 __all__ = ["param_field"]
 
 import dataclasses
-from typing import Any, Mapping, Optional
+from beartype.typing import Any, Mapping, Optional
 
 import tensorflow_probability.substrates.jax.bijectors as tfb
 

diff --git a/gpjax/dataset.py b/gpjax/dataset.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Optional
+from beartype.typing import Optional, Union
 
 import jax.numpy as jnp
 from jaxtyping import Array, Float
@@ -31,8 +30,8 @@ class Dataset(Pytree):
         y (Optional[Float[Array, "N Q"]]): Output data.
     """
 
-    X: Optional[Float[Array, "N D"]] = None
-    y: Optional[Float[Array, "N Q"]] = None
+    X: Optional[Union[Float[Array, "N D"], Float[Array, "..."]]] = None
+    y: Optional[Union[Float[Array, "N Q"], Float[Array, "..."]]] = None
 
     def __post_init__(self) -> None:
         """Checks that the shapes of X and y are compatible."""
@@ -54,7 +53,7 @@ def is_unsupervised(self) -> bool:
         """Returns `True` if the dataset is unsupervised."""
         return self.X is None and self.y is not None
 
-    def __add__(self, other: Dataset) -> Dataset:
+    def __add__(self, other: "Dataset") -> "Dataset":
         """Combine two datasets. Right hand dataset is stacked beneath the left."""
 
         X = None
@@ -84,7 +83,7 @@ def out_dim(self) -> int:
         return self.y.shape[1]
 
 
-def _check_shape(X: Float[Array, "N D"], y: Float[Array, "N Q"]) -> None:
+def _check_shape(X: Optional[Float[Array, "..."]], y: Optional[Float[Array, "..."]]) -> None:
     """Checks that the shapes of X and y are compatible."""
     if X is not None and y is not None:
         if X.shape[0] != y.shape[0]:

diff --git a/gpjax/fit.py b/gpjax/fit.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 # ==============================================================================
 
-from typing import Any, Optional, Tuple
+from beartype.typing import Any, Optional, Tuple
 
 import jax
 import jax.random as jr
 import optax as ox
 from jax._src.random import _check_prng_key
-from jax.random import KeyArray
 from jaxtyping import Array, Float
 from jaxlib.xla_extension import PjitFunction
 from warnings import warn
 
+from gpjax.utils import ScalarFloat, KeyArray
 from .base import Module
 from .dataset import Dataset
 from .objectives import AbstractObjective
@@ -117,7 +117,7 @@ def fit(
         _check_verbose(verbose)
 
     # Unconstrained space loss function with stop-gradient rule for non-trainable params.
-    def loss(model: Module, batch: Dataset) -> Float[Array, "1"]:
+    def loss(model: Module, batch: Dataset) -> ScalarFloat:
         model = model.stop_gradient()
         return objective(model.constrain(), batch)
 

diff --git a/gpjax/gaussian_distribution.py b/gpjax/gaussian_distribution.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 # ==============================================================================
 
-from typing import Any, Optional, Tuple
+
+from beartype.typing import Any, Optional, Tuple
 
 import jax.numpy as jnp
 import jax.random as jr
+from gpjax.utils import KeyArray
+from gpjax.utils import ScalarFloat
 from jax import vmap
-from jax.random import KeyArray
 from jaxtyping import Array, Float
 import tensorflow_probability.substrates.jax as tfp
 
@@ -132,20 +134,20 @@ def event_shape(self) -> Tuple:
         """Returns the event shape."""
         return self.loc.shape[-1:]
 
-    def entropy(self) -> Float[Array, "1"]:
+    def entropy(self) -> ScalarFloat:
         """Calculates the entropy of the distribution."""
         return 0.5 * (
             self.event_shape[0] * (1.0 + jnp.log(2.0 * jnp.pi)) + self.scale.log_det()
         )
 
-    def log_prob(self, y: Float[Array, "N"]) -> Float[Array, "1"]:
+    def log_prob(self, y: Float[Array, "N"]) -> ScalarFloat:
         """Calculates the log pdf of the multivariate Gaussian.
 
         Args:
             y (Float[Array, "N"]): The value to calculate the log probability of.
 
         Returns:
-            Float[Array, "1"]: The log probability of the value.
+            ScalarFloat: The log probability of the value.
         """
         mu = self.loc
         sigma = self.scale
@@ -179,11 +181,11 @@ def _sample_n(self, key: KeyArray, n: int) -> Float[Array, "n N"]:
 
         return vmap(affine_transformation)(Z)
 
-    def sample(self,seed: KeyArray, sample_shape: Tuple[int, int]):  # pylint: disable=useless-super-delegation
-      """See `Distribution.sample`."""
-      return self._sample_n(seed, sample_shape[0])
+    def sample(self, seed: KeyArray, sample_shape: Tuple[int, ...]):  # pylint: disable=useless-super-delegation
+        """See `Distribution.sample`."""
+        return self._sample_n(seed, sample_shape[0])  # TODO this looks weird, why ignore the second entry?
 
-    def kl_divergence(self, other: "GaussianDistribution") -> Float[Array, "1"]:
+    def kl_divergence(self, other: "GaussianDistribution") -> ScalarFloat:
         return _kl_divergence(self, other)
 
 
@@ -200,14 +202,14 @@ def _check_and_return_dimension(
     return q.event_shape[-1]
 
 
-def _frobeinius_norm_squared(matrix: Float[Array, "N N"]) -> Float[Array, "1"]:
+def _frobenius_norm_squared(matrix: Float[Array, "N N"]) -> ScalarFloat:
     """Calculates the squared Frobenius norm of a matrix."""
     return jnp.sum(jnp.square(matrix))
 
 
 def _kl_divergence(
     q: GaussianDistribution, p: GaussianDistribution
-) -> Float[Array, "1"]:
+) -> ScalarFloat:
     """Computes the KL divergence, KL[q||p], between two multivariate Gaussian distributions
         q(x) = N(x; μq, Σq) and p(x) = N(x; μp, Σp).
 
@@ -216,7 +218,7 @@ def _kl_divergence(
         p (GaussianDistribution): A multivariate Gaussian distribution.
 
     Returns:
-        Float[Array, "1"]: The KL divergence between q and p.
+        ScalarFloat: The KL divergence between q and p.
     """
 
     n_dim = _check_and_return_dimension(q, p)
@@ -237,14 +239,14 @@ def _kl_divergence(
     diff = mu_p - mu_q
 
     # trace term, tr[Σp⁻¹ Σq] = tr[(LpLpᵀ)⁻¹(LqLqᵀ)] = tr[(Lp⁻¹Lq)(Lp⁻¹Lq)ᵀ] = (fr[LqLp⁻¹])²
-    trace = _frobeinius_norm_squared(
+    trace = _frobenius_norm_squared(
         sqrt_p.solve(sqrt_q.to_dense())
     )  # TODO: Not most efficient, given the `to_dense()` call (e.g., consider diagonal p and q). Need to abstract solving linear operator against another linear operator.
 
     # Mahalanobis term, (μp - μq)ᵀ Σp⁻¹ (μp - μq) = tr [(μp - μq)ᵀ [LpLpᵀ]⁻¹ (μp - μq)] = (fr[Lp⁻¹(μp - μq)])²
-    mahalanobis = _frobeinius_norm_squared(
+    mahalanobis = jnp.sum(jnp.square(
         sqrt_p.solve(diff)
-    )  # TODO: Need to improve this. Perhaps add a Mahalanobis method to ``LinearOperator``s.
+    ))  # TODO: Need to improve this. Perhaps add a Mahalanobis method to ``LinearOperator``s.
 
     # KL[q(x)||p(x)] = [ [(μp - μq)ᵀ Σp⁻¹ (μp - μq)] - n - log|Σq| + log|Σp| + tr[Σp⁻¹ Σq] ] / 2
     return (mahalanobis - n_dim - sigma_q.log_det() + sigma_p.log_det() + trace) / 2.0