From a24f81da8087e545891bebc9345e65a8dfb1024d Mon Sep 17 00:00:00 2001
From: Ryan Williams <ryan@runsascoded.com>
Date: Tue, 10 Sep 2024 16:02:37 +0200
Subject: [PATCH] Revert "[python] Follow spec-0 (#1189)"

This reverts commit fc7aefe743cce95eb406b7e33b592018d05418b8.
---
 .github/workflows/py-dependency-check.yml     |  4 +-
 .github/workflows/py-unittests.yml            |  4 +-
 api/python/cellxgene_census/pyproject.toml    | 16 ++---
 .../src/cellxgene_census/_get_anndata.py      | 49 +++++++------
 .../src/cellxgene_census/_open.py             | 18 ++---
 .../cellxgene_census/_release_directory.py    | 37 +++++-----
 .../experimental/_embedding.py                |  8 +--
 .../experimental/_embedding_search.py         | 17 +++--
 .../experimental/ml/encoders.py               |  9 +--
 .../ml/huggingface/cell_dataset_builder.py    | 13 ++--
 .../ml/huggingface/geneformer_tokenizer.py    | 21 +++---
 .../experimental/ml/pytorch.py                | 71 +++++++++----------
 .../experimental/pp/_highly_variable_genes.py |  4 +-
 .../experimental/pp/_online.py                | 12 ++--
 .../experimental/pp/_stats.py                 |  3 +-
 .../experimental/util/_eager_iter.py          | 11 ++-
 .../tests/experimental/ml/test_pytorch.py     | 40 ++++++-----
 .../tests/experimental/pp/test_stats.py       |  8 +--
 .../experimental/test_embeddings_search.py    |  8 +--
 .../cellxgene_census/tests/test_acceptance.py | 25 ++++---
 .../tests/test_get_anndata.py                 | 20 +++---
 .../cellxgene_census/tests/test_lts_compat.py | 14 ++--
 .../cellxgene_census/tests/test_open.py       |  4 +-
 .../cellxgene_census/tests/test_user_agent.py |  3 +-
 api/python/notebooks/README.md                |  2 +-
 docs/cellxgene_census_docsite_installation.md |  2 +-
 26 files changed, 213 insertions(+), 210 deletions(-)

diff --git a/.github/workflows/py-dependency-check.yml b/.github/workflows/py-dependency-check.yml
index 010409042..3afc6a06a 100644
--- a/.github/workflows/py-dependency-check.yml
+++ b/.github/workflows/py-dependency-check.yml
@@ -25,10 +25,10 @@ jobs:
       fail-fast: false  # don't fail-fast, as errors are often specific to a single cell in the matrix
       matrix:
         os: [sc-dev-64g-runner, macos-latest]
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         exclude:
           - os: macos-latest
-            python-version: "3.12"
+            python-version: "3.8"
 
     runs-on: ${{matrix.os}}
 
diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index e5ba6f0eb..f224f4560 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -21,10 +21,10 @@ jobs:
       fail-fast: false  # Don't stop the workflow if one of the jobs fails
       matrix:
         os: [sc-dev-64g-runner, macos-latest]
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         exclude:
           - os: macos-latest
-            python-version: "3.12"
+            python-version: "3.8"
 
     runs-on: ${{matrix.os}}
 
diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
index e9aa1979d..f050bef75 100644
--- a/api/python/cellxgene_census/pyproject.toml
+++ b/api/python/cellxgene_census/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 ]
 license = { text = "MIT" }
 readme = "README.md"
-requires-python = ">= 3.10, < 3.13"
+requires-python = ">= 3.8, < 3.12"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
@@ -22,17 +22,18 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Operating System :: POSIX :: Linux",
     "Operating System :: MacOS :: MacOS X",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
 ]
 dependencies= [
     # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
     # ensure that the assets are readable (tiledbsoma supports backward compatible reading).
     # Make sure this version does not fall behind the builder's tiledbsoma version.
-    "tiledbsoma>=1.12.3",
+    "tiledbsoma~=1.12.3",
     "anndata",
-    "numpy>=1.23,<2.0",
+    "numpy>=1.21,<2.0",
     "requests",
     "typing_extensions",
     "s3fs>=2021.06.1",
@@ -42,8 +43,9 @@ dependencies= [
 experimental = [
     "torch",
     "torchdata~=0.7",
-    "scikit-learn>=1.2",
+    "scikit-learn~=1.0",
     "scikit-misc>=0.2,<0.4",  # scikit-misc 0.3 dropped Python 3.8 support, and 0.4 doesn't have MacOS/ARM wheels
+    "psutil~=5.0",
     "datasets~=2.0",
     "tdigest~=0.5",
     # choose newest version of tiledb-vector-search that doesn't need a newer version of tiledb
@@ -79,7 +81,7 @@ root = "../../.."
 [tool.ruff]
 line-length = 120
 src = ["api/python/cellxgene_census/src"]
-target-version = "py310"
+target-version = "py38"
 
 [tool.ruff.lint]
 select = [
@@ -127,8 +129,6 @@ ignore = [
     "D205",
     # Prefer absolute imports over relative imports from parent modules TODO: enable
     "TID252",
-    # It's okay to use zip without the strict kwarg. In fact, numba doesn't like it when you use it
-    "B905",
 ]
 
 [tool.ruff.lint.pydocstyle]
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
index 9d7a5c41b..e37337184 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
@@ -7,8 +7,7 @@
 Methods to retrieve slices of the census as AnnData objects.
 """
 
-from collections.abc import Sequence
-from typing import Literal
+from typing import Literal, Optional, Sequence
 from warnings import warn
 
 import anndata
@@ -28,20 +27,20 @@ def get_anndata(
     organism: str,
     measurement_name: str = "RNA",
     X_name: str = "raw",
-    X_layers: Sequence[str] | None = (),
-    obsm_layers: Sequence[str] | None = (),
-    obsp_layers: Sequence[str] | None = (),
-    varm_layers: Sequence[str] | None = (),
-    varp_layers: Sequence[str] | None = (),
-    obs_value_filter: str | None = None,
-    obs_coords: SparseDFCoord | None = None,
-    var_value_filter: str | None = None,
-    var_coords: SparseDFCoord | None = None,
-    column_names: soma.AxisColumnNames | None = None,
-    obs_embeddings: Sequence[str] | None = (),
-    var_embeddings: Sequence[str] | None = (),
-    obs_column_names: Sequence[str] | None = None,
-    var_column_names: Sequence[str] | None = None,
+    X_layers: Optional[Sequence[str]] = (),
+    obsm_layers: Optional[Sequence[str]] = (),
+    obsp_layers: Optional[Sequence[str]] = (),
+    varm_layers: Optional[Sequence[str]] = (),
+    varp_layers: Optional[Sequence[str]] = (),
+    obs_value_filter: Optional[str] = None,
+    obs_coords: Optional[SparseDFCoord] = None,
+    var_value_filter: Optional[str] = None,
+    var_coords: Optional[SparseDFCoord] = None,
+    column_names: Optional[soma.AxisColumnNames] = None,
+    obs_embeddings: Optional[Sequence[str]] = (),
+    var_embeddings: Optional[Sequence[str]] = (),
+    obs_column_names: Optional[Sequence[str]] = None,
+    var_column_names: Optional[Sequence[str]] = None,
 ) -> anndata.AnnData:
     """Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
     and return it as an :class:`anndata.AnnData` object.
@@ -177,9 +176,9 @@ def _get_axis_metadata(
     axis: Literal["obs", "var"],
     organism: str,
     *,
-    value_filter: str | None = None,
-    coords: SparseDFCoord | None = slice(None),
-    column_names: Sequence[str] | None = None,
+    value_filter: Optional[str] = None,
+    coords: Optional[SparseDFCoord] = slice(None),
+    column_names: Optional[Sequence[str]] = None,
 ) -> pd.DataFrame:
     exp = _get_experiment(census, organism)
     coords = (slice(None),) if coords is None else (coords,)
@@ -199,9 +198,9 @@ def get_obs(
     census: soma.Collection,
     organism: str,
     *,
-    value_filter: str | None = None,
-    coords: SparseDFCoord | None = slice(None),
-    column_names: Sequence[str] | None = None,
+    value_filter: Optional[str] = None,
+    coords: Optional[SparseDFCoord] = slice(None),
+    column_names: Optional[Sequence[str]] = None,
 ) -> pd.DataFrame:
     """Get the observation metadata for a query on the census.
 
@@ -231,9 +230,9 @@ def get_var(
     census: soma.Collection,
     organism: str,
     *,
-    value_filter: str | None = None,
-    coords: SparseDFCoord | None = slice(None),
-    column_names: Sequence[str] | None = None,
+    value_filter: Optional[str] = None,
+    coords: Optional[SparseDFCoord] = slice(None),
+    column_names: Optional[Sequence[str]] = None,
 ) -> pd.DataFrame:
     """Get the variable metadata for a query on the census.
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py
index 5af0fea0b..642e6fbb6 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_open.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_open.py
@@ -10,7 +10,7 @@
 import logging
 import os.path
 import urllib.parse
-from typing import Any, get_args
+from typing import Any, Dict, Optional, get_args
 
 import s3fs
 import tiledbsoma as soma
@@ -32,7 +32,7 @@
     "anon": True,
     "cache_regions": True,
 }
-DEFAULT_TILEDB_CONFIGURATION: dict[str, Any] = {
+DEFAULT_TILEDB_CONFIGURATION: Dict[str, Any] = {
     # https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
     "py.init_buffer_bytes": 1 * 1024**3,
     "soma.init_buffer_bytes": 1 * 1024**3,
@@ -71,7 +71,7 @@ def _resolve_census_locator(locator: CensusLocator, mirror: CensusMirror) -> Res
 
 def _open_soma(
     locator: ResolvedCensusLocator,
-    context: soma.options.SOMATileDBContext | None = None,
+    context: Optional[soma.options.SOMATileDBContext] = None,
 ) -> soma.Collection:
     """Private. Merge config defaults and return open census as a soma Collection/context."""
     # if no user-defined context, cellxgene_census defaults take precedence over SOMA defaults
@@ -85,7 +85,7 @@ def _open_soma(
     return soma.open(locator["uri"], mode="r", soma_type=soma.Collection, context=context)
 
 
-def get_default_soma_context(tiledb_config: dict[str, Any] | None = None) -> soma.options.SOMATileDBContext:
+def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
     """Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
     user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
     argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
@@ -132,11 +132,11 @@ def get_default_soma_context(tiledb_config: dict[str, Any] | None = None) -> som
 
 def open_soma(
     *,
-    census_version: str | None = DEFAULT_CENSUS_VERSION,
-    mirror: str | None = None,
-    uri: str | None = None,
-    tiledb_config: dict[str, Any] | None = None,
-    context: soma.options.SOMATileDBContext | None = None,
+    census_version: Optional[str] = DEFAULT_CENSUS_VERSION,
+    mirror: Optional[str] = None,
+    uri: Optional[str] = None,
+    tiledb_config: Optional[Dict[str, Any]] = None,
+    context: Optional[soma.options.SOMATileDBContext] = None,
 ) -> soma.Collection:
     """Open the Census by version or URI.
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
index 177094e90..644a9df5b 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
@@ -7,8 +7,9 @@
 Methods to retrieve information about versions of the publicly hosted Census object.
 """
 
+import typing
 from collections import OrderedDict
-from typing import Any, Literal, cast
+from typing import Any, Dict, Literal, Optional, Union, cast
 
 import requests
 from typing_extensions import NotRequired, TypedDict
@@ -36,7 +37,7 @@ class CensusLocator(TypedDict):
 
     uri: str
     relative_uri: str
-    s3_region: str | None
+    s3_region: Optional[str]
 
 
 class CensusVersionRetraction(TypedDict):
@@ -54,13 +55,13 @@ class CensusVersionRetraction(TypedDict):
     """
 
     date: str
-    reason: str | None
-    info_url: str | None
-    replaced_by: str | None
+    reason: Optional[str]
+    info_url: Optional[str]
+    replaced_by: Optional[str]
 
 
 ReleaseFlag = Literal["lts", "retracted"]
-ReleaseFlags = dict[ReleaseFlag, bool]
+ReleaseFlags = Dict[ReleaseFlag, bool]
 
 
 class CensusVersionDescription(TypedDict):
@@ -81,7 +82,7 @@ class CensusVersionDescription(TypedDict):
             If retracted, details of the retraction.
     """
 
-    release_date: str | None
+    release_date: Optional[str]
     release_build: str
     soma: CensusLocator
     h5ads: CensusLocator
@@ -89,7 +90,7 @@ class CensusVersionDescription(TypedDict):
     retraction: NotRequired[CensusVersionRetraction]
 
 
-CensusDirectory = dict[CensusVersionName, CensusVersionName | CensusVersionDescription]
+CensusDirectory = Dict[CensusVersionName, Union[CensusVersionName, CensusVersionDescription]]
 
 """
 A provider identifies a storage medium for the Census, which can either be a cloud provider or a local file.
@@ -131,11 +132,11 @@ class CensusMirror(TypedDict):
 
     provider: Provider
     base_uri: str
-    region: str | None
+    region: Optional[str]
     embeddings_base_uri: str
 
 
-CensusMirrors = dict[CensusMirrorName, CensusMirrorName | CensusMirror]
+CensusMirrors = Dict[CensusMirrorName, Union[CensusMirrorName, CensusMirror]]
 
 
 class ResolvedCensusLocator(TypedDict):
@@ -154,7 +155,7 @@ class ResolvedCensusLocator(TypedDict):
     """
 
     uri: str
-    region: str | None
+    region: Optional[str]
     provider: str
 
 
@@ -199,8 +200,8 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript
 
 
 def get_census_version_directory(
-    *, lts: bool | None = None, retracted: bool | None = False
-) -> dict[CensusVersionName, CensusVersionDescription]:
+    *, lts: Optional[bool] = None, retracted: Optional[bool] = False
+) -> Dict[CensusVersionName, CensusVersionDescription]:
     """Get the directory of Census versions currently available, optionally filtering by specified
     flags. If a filtering flag is not specified, Census versions will not be filtered by that flag.
     Defaults to including both "long-term stable" (LTS) and weekly Census versions, and excluding
@@ -355,9 +356,9 @@ def get_census_version_directory(
     response = requests.get(CELL_CENSUS_RELEASE_DIRECTORY_URL, headers={"User-Agent": _user_agent()})
     response.raise_for_status()
 
-    directory: dict[str, str | dict[str, Any]] = response.json()
+    directory: dict[str, Union[str, dict[str, Any]]] = response.json()
     directory_out: CensusDirectory = {}
-    aliases: set[CensusVersionName] = set()
+    aliases: typing.Set[CensusVersionName] = set()
 
     # Resolve all aliases for easier use
     for census_version_name in list(directory.keys()):
@@ -400,7 +401,7 @@ def get_census_version_directory(
         directory_out[census_version_name] = census_version_description.copy()
 
     # Cast is safe, as we have removed all aliases
-    unordered_directory = cast(dict[CensusVersionName, CensusVersionDescription], directory_out)
+    unordered_directory = cast(Dict[CensusVersionName, CensusVersionDescription], directory_out)
 
     # Sort by aliases and release date, descending
     aliased_releases = [(k, v) for k, v in unordered_directory.items() if k in aliases]
@@ -416,7 +417,7 @@ def get_census_version_directory(
     return ordered_directory
 
 
-def get_census_mirror_directory() -> dict[CensusMirrorName, CensusMirror]:
+def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
     """Get the directory of Census mirrors currently available.
 
     Returns:
@@ -428,7 +429,7 @@ def get_census_mirror_directory() -> dict[CensusMirrorName, CensusMirror]:
     """
     mirrors = _get_census_mirrors()
     del mirrors["default"]
-    return cast(dict[CensusMirrorName, CensusMirror], mirrors)
+    return cast(Dict[CensusMirrorName, CensusMirror], mirrors)
 
 
 def _get_census_mirrors() -> CensusMirrors:
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
index 34d93ef42..4baba8e06 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
@@ -8,7 +8,7 @@
 
 import json
 import warnings
-from typing import Any, cast
+from typing import Any, Dict, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -55,7 +55,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
         embedding_metadata = json.loads(E.metadata["CxG_embedding_info"])
         assert isinstance(embedding_metadata, dict)
 
-    return cast(dict[str, Any], embedding_metadata)
+    return cast(Dict[str, Any], embedding_metadata)
 
 
 def _get_embedding(
@@ -67,7 +67,7 @@ def _get_embedding(
     context: soma.options.SOMATileDBContext | None = None,
 ) -> npt.NDArray[np.float32]:
     """Private. Like get_embedding, but accepts a Census object and a Census directory."""
-    if isinstance(obs_soma_joinids, pa.Array | pa.ChunkedArray | pd.Series):
+    if isinstance(obs_soma_joinids, (pa.Array, pa.ChunkedArray, pd.Series)):
         obs_soma_joinids = obs_soma_joinids.to_numpy()
     assert isinstance(obs_soma_joinids, np.ndarray)
     if obs_soma_joinids.dtype != np.int64:
@@ -194,7 +194,7 @@ def get_embedding_metadata_by_name(
     response = requests.get(CELL_CENSUS_EMBEDDINGS_MANIFEST_URL, headers={"User-Agent": _user_agent()})
     response.raise_for_status()
 
-    manifest = cast(dict[str, dict[str, Any]], response.json())
+    manifest = cast(Dict[str, Dict[str, Any]], response.json())
     embeddings = []
     for _, obj in manifest.items():
         if (
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding_search.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding_search.py
index de09e2060..179fa1d6d 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding_search.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding_search.py
@@ -1,8 +1,7 @@
 """Nearest-neighbor search based on vector index of Census embeddings."""
 
-from collections.abc import Sequence
 from contextlib import ExitStack
-from typing import Any, NamedTuple, cast
+from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple, cast
 
 import anndata as ad
 import numpy as np
@@ -44,9 +43,9 @@ def find_nearest_obs(
     k: int = 10,
     nprobe: int = 100,
     memory_GiB: int = 4,
-    mirror: str | None = None,
-    embedding_metadata: dict[str, Any] | None = None,
-    **kwargs: dict[str, Any],
+    mirror: Optional[str] = None,
+    embedding_metadata: Optional[Dict[str, Any]] = None,
+    **kwargs: Dict[str, Any],
 ) -> NeighborObs:
     """Search Census for similar obs (cells) based on nearest neighbors in embedding space.
 
@@ -98,9 +97,9 @@ def find_nearest_obs(
 
 
 def _resolve_embedding_index(
-    embedding_metadata: dict[str, Any],
-    mirror: str | None = None,
-) -> tuple[str, str] | None:
+    embedding_metadata: Dict[str, Any],
+    mirror: Optional[str] = None,
+) -> Optional[Tuple[str, str]]:
     index_metadata = embedding_metadata.get("indexes", None)
     if not index_metadata:
         return None
@@ -118,7 +117,7 @@ def predict_obs_metadata(
     census_version: str,
     neighbors: NeighborObs,
     column_names: Sequence[str],
-    experiment: soma.Experiment | None = None,
+    experiment: Optional[soma.Experiment] = None,
 ) -> pd.DataFrame:
     """Predict obs metadata attributes for the query cells based on the embedding nearest neighbors.
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
index 0be576ef6..3d4fc4dc5 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
@@ -1,5 +1,6 @@
 import abc
 import functools
+from typing import List
 
 import numpy.typing as npt
 import pandas as pd
@@ -46,7 +47,7 @@ def name(self) -> str:
 
     @property
     @abc.abstractmethod
-    def columns(self) -> list[str]:
+    def columns(self) -> List[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         pass
 
@@ -76,7 +77,7 @@ def name(self) -> str:
         return self.col
 
     @property
-    def columns(self) -> list[str]:
+    def columns(self) -> List[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         return [self.col]
 
@@ -89,7 +90,7 @@ def classes_(self):  # type: ignore
 class BatchEncoder(Encoder):
     """An encoder that concatenates and encodes several ``obs`` columns."""
 
-    def __init__(self, cols: list[str], name: str = "batch"):
+    def __init__(self, cols: List[str], name: str = "batch"):
         self.cols = cols
         from sklearn.preprocessing import LabelEncoder
 
@@ -114,7 +115,7 @@ def fit(self, obs: pd.DataFrame) -> None:
         self._encoder.fit(arr.unique())
 
     @property
-    def columns(self) -> list[str]:
+    def columns(self) -> List[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         return self.cols
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
index 5a9c2d626..07d2212c8 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
@@ -1,7 +1,6 @@
 import uuid
 from abc import ABC, abstractmethod
-from collections.abc import Generator
-from typing import Any
+from typing import Any, Dict, Generator, Optional
 
 import scipy.sparse
 from datasets import Dataset
@@ -38,7 +37,7 @@ def __init__(
         measurement_name: str = "RNA",
         layer_name: str = "raw",
         *,
-        block_size: int | None = None,
+        block_size: Optional[int] = None,
         **kwargs: Any,
     ):
         """Initialize the CellDatasetBuilder to process the results of a Census
@@ -56,13 +55,13 @@ def __init__(
         self.layer_name = layer_name
         self.block_size = block_size
 
-    def build(self, from_generator_kwargs: dict[str, Any] | None = None) -> Dataset:
+    def build(self, from_generator_kwargs: Optional[Dict[str, Any]] = None) -> Dataset:
         """Build the dataset from query results.
 
         - `from_generator_kwargs`: kwargs passed through to `Dataset.from_generator()`
         """
 
-        def gen() -> Generator[dict[str, Any], None, None]:
+        def gen() -> Generator[Dict[str, Any], None, None]:
             for Xblock, (block_cell_joinids, _) in (
                 self.X(self.layer_name).blockwise(axis=0, reindex_disable_on_axis=[1], size=self.block_size).scipy()
             ):
@@ -74,7 +73,7 @@ def gen() -> Generator[dict[str, Any], None, None]:
         return Dataset.from_generator(_DatasetGeneratorPickleHack(gen), **(from_generator_kwargs or {}))
 
     @abstractmethod
-    def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
+    def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
         """Abstract method to process the X row for one cell into a Dataset item.
 
         - `cell_joinid`: The cell `soma_joinid`.
@@ -87,7 +86,7 @@ def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> dict[str
 class _DatasetGeneratorPickleHack:
     """SEE: https://github.com/huggingface/datasets/issues/6194."""
 
-    def __init__(self, generator: Any, generator_id: str | None = None) -> None:
+    def __init__(self, generator: Any, generator_id: Optional[str] = None) -> None:
         self.generator = generator
         self.generator_id = generator_id if generator_id is not None else str(uuid.uuid4())
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
index 7303d3bbf..48ea8fdea 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
@@ -1,6 +1,5 @@
 import pickle
-from collections.abc import Sequence
-from typing import Any
+from typing import Any, Dict, List, Optional, Sequence, Set
 
 import numpy as np
 import numpy.typing as npt
@@ -43,7 +42,7 @@ class GeneformerTokenizer(CellDatasetBuilder):
     - and the specified `obs_column_names` (cell metadata from the experiment obs dataframe)
     """
 
-    obs_column_names: set[str]
+    obs_column_names: Set[str]
     max_input_tokens: int
     special_token: bool
 
@@ -55,15 +54,15 @@ class GeneformerTokenizer(CellDatasetBuilder):
     model_gene_map: scipy.sparse.coo_matrix
     model_gene_tokens: npt.NDArray[np.int64]  # Geneformer token for each column of model_gene_map
     model_gene_medians: npt.NDArray[np.float64]  # float for each column of model_gene_map
-    model_cls_token: np.int64 | None = None
-    model_eos_token: np.int64 | None = None
+    model_cls_token: Optional[np.int64] = None
+    model_eos_token: Optional[np.int64] = None
 
     def __init__(
         self,
         experiment: tiledbsoma.Experiment,
         *,
-        obs_column_names: Sequence[str] | None = None,
-        obs_attributes: Sequence[str] | None = None,
+        obs_column_names: Optional[Sequence[str]] = None,
+        obs_attributes: Optional[Sequence[str]] = None,
         max_input_tokens: int = 2048,
         special_token: bool = False,
         token_dictionary_file: str = "",
@@ -148,10 +147,10 @@ def _load_geneformer_data(
         map_data = []
         map_i = []
         map_j = []
-        model_gene_id_by_ensg: dict[str, int] = {}
+        model_gene_id_by_ensg: Dict[str, int] = {}
         model_gene_count = 0
-        model_gene_tokens: list[np.int64] = []
-        model_gene_medians: list[np.float64] = []
+        model_gene_tokens: List[np.int64] = []
+        model_gene_medians: List[np.float64] = []
         for gene_id, row in genes_df.iterrows():
             ensg = row["feature_id"]  # ENSG... gene id, which keys Geneformer's dicts
             if gene_mapping is not None:
@@ -199,7 +198,7 @@ def __enter__(self) -> "GeneformerTokenizer":
         self.obs_df = self.obs(column_names=obs_column_names).concat().to_pandas().set_index("soma_joinid")
         return self
 
-    def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
+    def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
         """Given the expression vector for one cell, compute the Dataset item providing
         the Geneformer inputs (token sequence and metadata).
         """
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
index 2e12f68b6..5bef673c9 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
@@ -3,12 +3,11 @@
 import logging
 import os
 import typing
-from collections.abc import Iterator, Sequence
 from contextlib import contextmanager
 from datetime import timedelta
 from math import ceil
 from time import time
-from typing import Any, TypeAlias
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -33,13 +32,13 @@
 pytorch_logger = logging.getLogger("cellxgene_census.experimental.pytorch")
 
 # TODO: Rename to reflect the correct order of the Tensors within the tuple: (X, obs)
-ObsAndXDatum = tuple[Tensor, Tensor]
+ObsAndXDatum = Tuple[Tensor, Tensor]
 """Return type of ``ExperimentDataPipe`` that pairs a Tensor of ``obs`` row(s) with a Tensor of ``X`` matrix row(s).
 The Tensors are rank 1 if ``batch_size`` is 1, otherwise the Tensors are rank 2."""
 
 
 # "Chunk" of X data, returned by each `Method` above
-ChunkX: TypeAlias = npt.NDArray[Any] | sparse.csr_matrix
+ChunkX = Union[npt.NDArray[Any], sparse.csr_matrix]
 
 
 @define
@@ -59,7 +58,7 @@ def __len__(self) -> int:
         return len(self.obs)
 
 
-Encoders = dict[str, Encoder]
+Encoders = Dict[str, Encoder]
 """A dictionary of ``Encoder``s keyed by the ``obs`` column name."""
 
 
@@ -98,7 +97,7 @@ def __add__(self, other: "Stats") -> "Stats":
 @contextmanager
 def _open_experiment(
     uri: str,
-    aws_region: str | None = None,
+    aws_region: Optional[str] = None,
 ) -> soma.Experiment:
     """Internal method for opening a SOMA ``Experiment`` as a context manager."""
     context = get_default_soma_context().replace(tiledb_config={"vfs.s3.region": aws_region} if aws_region else {})
@@ -108,8 +107,8 @@ def _open_experiment(
 
 
 def _tables_to_np(
-    tables: Iterator[tuple[Table, Any]], shape: tuple[int, int]
-) -> typing.Generator[tuple[npt.NDArray[Any], Any, int], None, None]:
+    tables: Iterator[Tuple[Table, Any]], shape: Tuple[int, int]
+) -> typing.Generator[Tuple[npt.NDArray[Any], Any, int], None, None]:
     for tbl, indices in tables:
         row_indices, col_indices, data = (x.to_numpy() for x in tbl.columns)
         nnz = len(data)
@@ -136,10 +135,10 @@ def __init__(
         obs: soma.DataFrame,
         X: soma.SparseNDArray,
         obs_column_names: Sequence[str],
-        obs_joinids_chunked: list[npt.NDArray[np.int64]],
+        obs_joinids_chunked: List[npt.NDArray[np.int64]],
         var_joinids: npt.NDArray[np.int64],
-        shuffle_chunk_count: int | None = None,
-        shuffle_rng: Generator | None = None,
+        shuffle_chunk_count: Optional[int] = None,
+        shuffle_rng: Optional[Generator] = None,
         return_sparse_X: bool = False,
     ):
         self.obs = obs
@@ -222,7 +221,7 @@ def __next__(self) -> _SOMAChunk:
         return _SOMAChunk(obs=obs_batch, X=X_batch, stats=stats)
 
 
-def list_split(arr_list: list[Any], sublist_len: int) -> list[list[Any]]:
+def list_split(arr_list: List[Any], sublist_len: int) -> List[List[Any]]:
     """Splits a python list into a list of sublists where each sublist is of size `sublist_len`.
     TODO: Replace with `itertools.batched` when Python 3.12 becomes the minimum supported version.
     """
@@ -239,7 +238,7 @@ def list_split(arr_list: list[Any], sublist_len: int) -> list[list[Any]]:
     return result
 
 
-def run_gc() -> tuple[tuple[Any, Any, Any], tuple[Any, Any, Any], float]:  # noqa: D103
+def run_gc() -> Tuple[Tuple[Any, Any, Any], Tuple[Any, Any, Any], float]:  # noqa: D103
     proc = psutil.Process(os.getpid())
 
     pre_gc = proc.memory_full_info(), psutil.virtual_memory(), psutil.swap_memory()
@@ -267,7 +266,7 @@ class _ObsAndXIterator(Iterator[ObsAndXDatum]):
     soma_chunk_iter: Iterator[_SOMAChunk]
     """The iterator for SOMA chunks of paired obs and X data"""
 
-    soma_chunk: _SOMAChunk | None
+    soma_chunk: Optional[_SOMAChunk]
     """The current SOMA chunk of obs and X data"""
 
     i: int = -1
@@ -278,15 +277,15 @@ def __init__(
         obs: soma.DataFrame,
         X: soma.SparseNDArray,
         obs_column_names: Sequence[str],
-        obs_joinids_chunked: list[npt.NDArray[np.int64]],
+        obs_joinids_chunked: List[npt.NDArray[np.int64]],
         var_joinids: npt.NDArray[np.int64],
         batch_size: int,
-        encoders: list[Encoder],
+        encoders: List[Encoder],
         stats: Stats,
         return_sparse_X: bool,
         use_eager_fetch: bool,
-        shuffle_chunk_count: int | None = None,
-        shuffle_rng: Generator | None = None,
+        shuffle_chunk_count: Optional[int] = None,
+        shuffle_rng: Optional[Generator] = None,
     ) -> None:
         self.soma_chunk_iter = _ObsAndXSOMAIterator(
             obs,
@@ -363,7 +362,7 @@ def __next__(self) -> ObsAndXDatum:
 
         return X_tensor, obs_tensor
 
-    def _read_partial_torch_batch(self, batch_size: int) -> tuple[pd.DataFrame, ChunkX]:
+    def _read_partial_torch_batch(self, batch_size: int) -> Tuple[pd.DataFrame, ChunkX]:
         """Reads a torch-size batch of data from the current SOMA chunk, returning a torch-size batch whose size may
         contain fewer rows than the requested ``batch_size``. This can happen when the remaining rows in the current
         SOMA chunk are fewer than the requested ``batch_size``.
@@ -444,15 +443,15 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]):  # type: ig
 
     _initialized: bool
 
-    _obs_joinids: npt.NDArray[np.int64] | None
+    _obs_joinids: Optional[npt.NDArray[np.int64]]
 
-    _var_joinids: npt.NDArray[np.int64] | None
+    _var_joinids: Optional[npt.NDArray[np.int64]]
 
-    _encoders: list[Encoder]
+    _encoders: List[Encoder]
 
     _stats: Stats
 
-    _shuffle_rng: Generator | None
+    _shuffle_rng: Optional[Generator]
 
     # TODO: Consider adding another convenience method wrapper to construct this object whose signature is more closely
     #  aligned with get_anndata() params (i.e. "exploded" AxisQuery params).
@@ -461,17 +460,17 @@ def __init__(
         experiment: soma.Experiment,
         measurement_name: str = "RNA",
         X_name: str = "raw",
-        obs_query: soma.AxisQuery | None = None,
-        var_query: soma.AxisQuery | None = None,
+        obs_query: Optional[soma.AxisQuery] = None,
+        var_query: Optional[soma.AxisQuery] = None,
         obs_column_names: Sequence[str] = (),
         batch_size: int = 1,
         shuffle: bool = True,
-        seed: int | None = None,
+        seed: Optional[int] = None,
         return_sparse_X: bool = False,
-        soma_chunk_size: int | None = 64,
+        soma_chunk_size: Optional[int] = 64,
         use_eager_fetch: bool = True,
-        shuffle_chunk_count: int | None = 2000,
-        encoders: list[Encoder] | None = None,
+        encoders: Optional[List[Encoder]] = None,
+        shuffle_chunk_count: Optional[int] = 2000,
     ) -> None:
         r"""Construct a new ``ExperimentDataPipe``.
 
@@ -597,10 +596,10 @@ def _init(self) -> None:
 
     @staticmethod
     def _subset_ids_to_partition(
-        ids_chunked: list[npt.NDArray[np.int64]],
+        ids_chunked: List[npt.NDArray[np.int64]],
         partition_index: int,
         num_partitions: int,
-    ) -> list[npt.NDArray[np.int64]]:
+    ) -> List[npt.NDArray[np.int64]]:
         """Returns a single partition of the obs_joinids_chunked (a 2D ndarray), based upon the current process's distributed rank and world
         size.
         """
@@ -623,7 +622,7 @@ def _compute_partitions(
         loader_partitions: int,
         dist_partition: int,
         num_dist_partitions: int,
-    ) -> tuple[int, int]:
+    ) -> Tuple[int, int]:
         # NOTE: Can alternately use a `worker_init_fn` to split among workers split workload
         total_partitions = num_dist_partitions * loader_partitions
         partition = dist_partition * loader_partitions + loader_partition
@@ -666,7 +665,7 @@ def __iter__(self) -> Iterator[ObsAndXDatum]:
             dist_partition=dist.get_rank() if dist.is_initialized() else 0,
             num_dist_partitions=dist.get_world_size() if dist.is_initialized() else 1,
         )
-        obs_joinids_chunked_partition: list[npt.NDArray[np.int64]] = self._subset_ids_to_partition(
+        obs_joinids_chunked_partition: List[npt.NDArray[np.int64]] = self._subset_ids_to_partition(
             obs_joinids_chunked, partition, partitions
         )
 
@@ -694,7 +693,7 @@ def __iter__(self) -> Iterator[ObsAndXDatum]:
             )
 
     @staticmethod
-    def _chunk_ids(ids: npt.NDArray[np.int64], chunk_size: int) -> list[npt.NDArray[np.int64]]:
+    def _chunk_ids(ids: npt.NDArray[np.int64], chunk_size: int) -> List[npt.NDArray[np.int64]]:
         num_chunks = max(1, ceil(len(ids) / chunk_size))
         pytorch_logger.debug(f"Shuffling {len(ids)} obs joinids into {num_chunks} chunks of {chunk_size}")
         return np.array_split(ids, num_chunks)
@@ -709,7 +708,7 @@ def __len__(self) -> int:
     def __getitem__(self, index: int) -> ObsAndXDatum:
         raise NotImplementedError("IterDataPipe can only be iterated")
 
-    def _build_obs_encoders(self, query: soma.ExperimentAxisQuery) -> list[Encoder]:
+    def _build_obs_encoders(self, query: soma.ExperimentAxisQuery) -> List[Encoder]:
         pytorch_logger.debug("Initializing encoders")
 
         encoders = []
@@ -749,7 +748,7 @@ def stats(self) -> Stats:
         return self._stats
 
     @property
-    def shape(self) -> tuple[int, int]:
+    def shape(self) -> Tuple[int, int]:
         """Get the shape of the data that will be returned by this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
         This is the number of obs (cell) and var (feature) counts in the returned data. If used in multiprocessing mode
         (i.e. :class:`torch.utils.data.DataLoader` instantiated with num_workers > 0), the obs (cell) count will reflect
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
index 188513c65..c47ad9f1e 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
 import os
-from collections.abc import Callable, Sequence
 from concurrent import futures
-from typing import Any, Literal, cast
+from typing import Any, Callable, Sequence, cast
 
 import numpy as np
 import pandas as pd
 import tiledbsoma as soma
 from somacore.options import SparseDFCoord
+from typing_extensions import Literal
 
 from ..._experiment import _get_experiment
 from ..util._eager_iter import _EagerIterator
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
index 13bbe76c5..2eaf71a7b 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
@@ -1,3 +1,5 @@
+from typing import Optional, Tuple
+
 import numba
 import numpy as np
 import numpy.typing as npt
@@ -41,7 +43,7 @@ def update(
         self,
         var_vec: npt.NDArray[np.int64],
         val_vec: npt.NDArray[np.float32],
-        batch_vec: npt.NDArray[np.int64] | None = None,
+        batch_vec: Optional[npt.NDArray[np.int64]] = None,
     ) -> None:
         if self.n_batches == 1:
             assert batch_vec is None
@@ -52,7 +54,7 @@ def update(
 
     def finalize(
         self,
-    ) -> tuple[
+    ) -> Tuple[
         npt.NDArray[np.float64],
         npt.NDArray[np.float64],
         npt.NDArray[np.float64],
@@ -123,7 +125,7 @@ def update(
         self,
         var_vec: npt.NDArray[np.int64],
         val_vec: npt.NDArray[np.float32],
-        batch_vec: npt.NDArray[np.int64] | None = None,
+        batch_vec: Optional[npt.NDArray[np.int64]] = None,
     ) -> None:
         if self.n_batches == 1:
             assert batch_vec is None
@@ -145,7 +147,7 @@ def update(
                 self.clip_val,
             )
 
-    def finalize(self) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+    def finalize(self) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
         return self.counts_sum, self.squared_counts_sum
 
 
@@ -280,7 +282,7 @@ def _mbomv_combine_batches(
     n_samples: npt.NDArray[np.int64],
     u: npt.NDArray[np.float64],
     M2: npt.NDArray[np.float64],
-) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
     """Combine all batches using Chan's parallel adaptation of Welford's.
 
     Returns tuple of (u, M2).
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
index 8cef3e24e..a5930525a 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
-from collections.abc import Generator
 from concurrent import futures
-from typing import Any
+from typing import Any, Generator
 
 import numpy as np
 import numpy.typing as npt
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
index 9c229b889..6ee5db37a 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
@@ -1,10 +1,9 @@
 import logging
 import threading
 from collections import deque
-from collections.abc import Iterator
 from concurrent import futures
 from concurrent.futures import Future
-from typing import TypeVar
+from typing import Deque, Iterator, Optional, TypeVar
 
 util_logger = logging.getLogger("cellxgene_census.experimental.util")
 
@@ -15,13 +14,13 @@ class _EagerIterator(Iterator[_T]):
     def __init__(
         self,
         iterator: Iterator[_T],
-        pool: futures.Executor | None = None,
+        pool: Optional[futures.Executor] = None,
     ):
         super().__init__()
         self.iterator = iterator
         self._pool = pool or futures.ThreadPoolExecutor()
         self._own_pool = pool is None
-        self._future: Future[_T] | None = None
+        self._future: Optional[Future[_T]] = None
         self._begin_next()
 
     def _begin_next(self) -> None:
@@ -57,14 +56,14 @@ def __init__(
         self,
         iterator: Iterator[_T],
         max_pending: int = 1,
-        pool: futures.Executor | None = None,
+        pool: Optional[futures.Executor] = None,
     ):
         super().__init__()
         self.iterator = iterator
         self.max_pending = max_pending
         self._pool = pool or futures.ThreadPoolExecutor()
         self._own_pool = pool is None
-        self._pending_results: deque[futures.Future[_T]] = deque()
+        self._pending_results: Deque[futures.Future[_T]] = deque()
         self._lock = threading.Lock()
         self._begin_next()
 
diff --git a/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py b/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
index e76b87508..3490c4bbd 100644
--- a/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
+++ b/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
@@ -1,5 +1,6 @@
 import pathlib
-from collections.abc import Callable, Sequence
+import sys
+from typing import Callable, List, Optional, Sequence, Union
 from unittest.mock import patch
 
 import numpy as np
@@ -49,17 +50,17 @@ def pytorch_seq_x_value_gen(obs_range: range, var_range: range) -> spmatrix:
 
 
 @pytest.fixture
-def X_layer_names() -> list[str]:
+def X_layer_names() -> List[str]:
     return ["raw"]
 
 
 @pytest.fixture
-def obsp_layer_names() -> list[str] | None:
+def obsp_layer_names() -> Optional[List[str]]:
     return None
 
 
 @pytest.fixture
-def varp_layer_names() -> list[str] | None:
+def varp_layer_names() -> Optional[List[str]]:
     return None
 
 
@@ -101,8 +102,8 @@ def add_sparse_array(
 @pytest.fixture(scope="function")
 def soma_experiment(
     tmp_path: pathlib.Path,
-    obs_range: int | range,
-    var_range: int | range,
+    obs_range: Union[int, range],
+    var_range: Union[int, range],
     X_value_gen: Callable[[range, range], sparse.spmatrix],
     obsp_layer_names: Sequence[str],
     varp_layer_names: Sequence[str],
@@ -484,6 +485,10 @@ def test_custom_encoders_fail_if_columns_defined(soma_experiment: Experiment) ->
 
 
 @pytest.mark.experimental
+@pytest.mark.skipif(
+    (sys.version_info.major, sys.version_info.minor) == (3, 9),
+    reason="fails intermittently with OOM error for 3.9",
+)
 # noinspection PyTestParametrized
 @pytest.mark.parametrize("obs_range,var_range,X_value_gen", [(6, 3, pytorch_x_value_gen)])
 def test_multiprocessing__returns_full_result(soma_experiment: Experiment) -> None:
@@ -515,11 +520,11 @@ def test_distributed__returns_data_partition_for_rank(
     """Tests pytorch._partition_obs_joinids() behavior in a simulated PyTorch distributed processing mode,
     using mocks to avoid having to do real PyTorch distributed setup."""
 
-    with (
-        patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized,
-        patch("cellxgene_census.experimental.ml.pytorch.dist.get_rank") as mock_dist_get_rank,
-        patch("cellxgene_census.experimental.ml.pytorch.dist.get_world_size") as mock_dist_get_world_size,
-    ):
+    with patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized, patch(
+        "cellxgene_census.experimental.ml.pytorch.dist.get_rank"
+    ) as mock_dist_get_rank, patch(
+        "cellxgene_census.experimental.ml.pytorch.dist.get_world_size"
+    ) as mock_dist_get_world_size:
         mock_dist_is_initialized.return_value = True
         mock_dist_get_rank.return_value = 1
         mock_dist_get_world_size.return_value = 3
@@ -551,12 +556,13 @@ def test_distributed_and_multiprocessing__returns_data_partition_for_rank(
     DataLoader multiprocessing mode, using mocks to avoid having to do distributed pytorch
     setup or real DataLoader multiprocessing."""
 
-    with (
-        patch("torch.utils.data.get_worker_info") as mock_get_worker_info,
-        patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized,
-        patch("cellxgene_census.experimental.ml.pytorch.dist.get_rank") as mock_dist_get_rank,
-        patch("cellxgene_census.experimental.ml.pytorch.dist.get_world_size") as mock_dist_get_world_size,
-    ):
+    with patch("torch.utils.data.get_worker_info") as mock_get_worker_info, patch(
+        "cellxgene_census.experimental.ml.pytorch.dist.is_initialized"
+    ) as mock_dist_is_initialized, patch(
+        "cellxgene_census.experimental.ml.pytorch.dist.get_rank"
+    ) as mock_dist_get_rank, patch(
+        "cellxgene_census.experimental.ml.pytorch.dist.get_world_size"
+    ) as mock_dist_get_world_size:
         mock_get_worker_info.return_value = WorkerInfo(id=1, num_workers=2, seed=1234)
         mock_dist_is_initialized.return_value = True
         mock_dist_get_rank.return_value = 1
diff --git a/api/python/cellxgene_census/tests/experimental/pp/test_stats.py b/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
index ecc410d0e..3c113ea07 100644
--- a/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
+++ b/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
@@ -1,4 +1,4 @@
-from typing import Any
+from typing import Any, Tuple, Union
 
 import numpy as np
 import numpy.ma as ma
@@ -11,7 +11,7 @@
 from cellxgene_census.experimental import pp
 
 
-def var(X: sparse.csc_matrix | sparse.csr_matrix, axis: int = 0, ddof: int = 1) -> Any:
+def var(X: Union[sparse.csc_matrix, sparse.csr_matrix], axis: int = 0, ddof: int = 1) -> Any:
     """
     Variance of a sparse matrix calculated as mean(X**2) - mean(X)**2
     with Bessel's correction applied for unbiased estimate
@@ -52,7 +52,7 @@ def test_mean_variance(
     calc_mean: bool,
     calc_variance: bool,
     small_mem_context: soma.SOMATileDBContext,
-    obs_coords: tuple[None, slice],
+    obs_coords: Tuple[None, slice],
 ) -> None:
     with cellxgene_census.open_soma(census_version="latest", context=small_mem_context) as census:
         with census["census_data"][experiment_name].axis_query(
@@ -119,7 +119,7 @@ def test_mean_variance_nnz_only(
     calc_mean: bool,
     calc_variance: bool,
     small_mem_context: soma.SOMATileDBContext,
-    obs_coords: tuple[None, slice],
+    obs_coords: Tuple[None, slice],
 ) -> None:
     # Note: since this test requires materializing the matrix in memory to compute the mean/variance,
     # we're going to use a coord slice based approach. This will ensure the matrix can fit in memory.
diff --git a/api/python/cellxgene_census/tests/experimental/test_embeddings_search.py b/api/python/cellxgene_census/tests/experimental/test_embeddings_search.py
index a58a31628..b3c0f6f77 100644
--- a/api/python/cellxgene_census/tests/experimental/test_embeddings_search.py
+++ b/api/python/cellxgene_census/tests/experimental/test_embeddings_search.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any
+from typing import Any, Dict, List
 
 import anndata as ad
 import numpy as np
@@ -15,7 +15,7 @@
 
 @pytest.mark.experimental
 @pytest.mark.live_corpus
-def test_embeddings_search(true_neighbors: dict[str, Any], query_result: NeighborObs) -> None:
+def test_embeddings_search(true_neighbors: Dict[str, Any], query_result: NeighborObs) -> None:
     # check result shapes
     rslt = query_result
     assert isinstance(rslt.neighbor_ids, np.ndarray)
@@ -96,7 +96,7 @@ def test_predict_obs_metadata(query_anndata: ad.AnnData, query_result: NeighborO
 
 
 @pytest.fixture(scope="module")
-def true_neighbors() -> dict[int, list[dict[str, Any]]]:
+def true_neighbors() -> Dict[int, List[Dict[str, Any]]]:
     ans = {}
     for line in TRUE_NEAREST_NEIGHBORS_JSON.strip().split("\n"):
         example = json.loads(line)
@@ -105,7 +105,7 @@ def true_neighbors() -> dict[int, list[dict[str, Any]]]:
 
 
 @pytest.fixture(scope="module")
-def query_anndata(true_neighbors: dict[str, Any]) -> ad.AnnData:
+def query_anndata(true_neighbors: Dict[str, Any]) -> ad.AnnData:
     with cellxgene_census.open_soma(census_version=TRUE_NEAREST_NEIGHBORS_CENSUS_VERSION) as census:
         return cellxgene_census.get_anndata(
             census,
diff --git a/api/python/cellxgene_census/tests/test_acceptance.py b/api/python/cellxgene_census/tests/test_acceptance.py
index bd01b840b..d4587e03e 100644
--- a/api/python/cellxgene_census/tests/test_acceptance.py
+++ b/api/python/cellxgene_census/tests/test_acceptance.py
@@ -11,8 +11,7 @@
 See README.md for historical data.
 """
 
-from collections.abc import Iterator
-from typing import Any
+from typing import Any, Dict, Iterator, Optional, Tuple
 
 import pyarrow as pa
 import pytest
@@ -22,7 +21,7 @@
 from cellxgene_census._open import DEFAULT_TILEDB_CONFIGURATION
 
 
-def make_context(census_version: str, config: dict[str, Any] | None = None) -> soma.SOMATileDBContext:
+def make_context(census_version: str, config: Optional[Dict[str, Any]] = None) -> soma.SOMATileDBContext:
     config = config or {}
     version = cellxgene_census.get_census_version_description(census_version)
     s3_region = version["soma"].get("s3_region", "us-west-2")
@@ -52,7 +51,7 @@ def test_load_axes(organism: str) -> None:
     del var_df
 
 
-def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: int | None = 2) -> bool:
+def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: Optional[int] = 2) -> bool:
     """
     Utility that verifies that the value is an iterator of pa.Table.
 
@@ -79,7 +78,7 @@ def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: int | None = 2) -
         pytest.param(None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive),
     ],
 )
-def test_incremental_read_obs(organism: str, stop_after: int | None, ctx_config: dict[str, Any] | None) -> None:
+def test_incremental_read_obs(organism: str, stop_after: Optional[int], ctx_config: Optional[Dict[str, Any]]) -> None:
     """Verify that obs, var and X[raw] can be read incrementally, i.e., in chunks"""
 
     # ctx_config=None open census with a small (default) TileDB buffer size, which reduces
@@ -102,7 +101,7 @@ def test_incremental_read_obs(organism: str, stop_after: int | None, ctx_config:
         pytest.param(None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive),
     ],
 )
-def test_incremental_read_var(organism: str, stop_after: int | None, ctx_config: dict[str, Any] | None) -> None:
+def test_incremental_read_var(organism: str, stop_after: Optional[int], ctx_config: Optional[Dict[str, Any]]) -> None:
     """Verify that var can be read incrementally, i.e., in chunks"""
 
     # ctx_config=None open census with a small (default) TileDB buffer size, which reduces
@@ -144,9 +143,9 @@ def test_incremental_read_var(organism: str, stop_after: int | None, ctx_config:
 )
 def test_incremental_read_X(
     organism: str,
-    stop_after: int | None,
-    ctx_config: dict[str, Any] | None,
-    coords: tuple[slice, slice] | None,
+    stop_after: Optional[int],
+    ctx_config: Optional[Dict[str, Any]],
+    coords: Optional[Tuple[slice, slice]],
 ) -> None:
     """Verify that obs, var and X[raw] can be read incrementally, i.e., in chunks"""
 
@@ -166,7 +165,7 @@ def test_incremental_read_X(
     ["tissue=='aorta'", pytest.param("tissue=='brain'", marks=pytest.mark.expensive)],
 )
 @pytest.mark.parametrize("stop_after", [2, pytest.param(None, marks=pytest.mark.expensive)])
-def test_incremental_query(organism: str, obs_value_filter: str, stop_after: int | None) -> None:
+def test_incremental_query(organism: str, obs_value_filter: str, stop_after: Optional[int]) -> None:
     """Verify incremental read of query result."""
     # use default TileDB configuration
     with cellxgene_census.open_soma(census_version="latest") as census:
@@ -261,9 +260,9 @@ def test_incremental_query(organism: str, obs_value_filter: str, stop_after: int
 )
 def test_get_anndata(
     organism: str,
-    obs_value_filter: str | None,
-    obs_coords: slice | None,
-    ctx_config: dict[str, Any] | None,
+    obs_value_filter: Optional[str],
+    obs_coords: Optional[slice],
+    ctx_config: Optional[Dict[str, Any]],
 ) -> None:
     """Verify query and read into AnnData"""
     ctx_config = ctx_config or {}
diff --git a/api/python/cellxgene_census/tests/test_get_anndata.py b/api/python/cellxgene_census/tests/test_get_anndata.py
index f3161cca8..fb3375f97 100644
--- a/api/python/cellxgene_census/tests/test_get_anndata.py
+++ b/api/python/cellxgene_census/tests/test_get_anndata.py
@@ -1,4 +1,4 @@
-from typing import Any, Literal
+from typing import Any, Dict, List, Literal
 
 import numpy as np
 import pandas as pd
@@ -83,7 +83,7 @@ def test_get_anndata_x_layer(census: soma.Collection, layer: str) -> None:
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("layers", [["raw", "normalized"], ["normalized", "raw"]])
-def test_get_anndata_two_layers(census: soma.Collection, layers: list[str]) -> None:
+def test_get_anndata_two_layers(census: soma.Collection, layers: List[str]) -> None:
     ad_primary_layer_in_X = cellxgene_census.get_anndata(
         census,
         organism="Homo sapiens",
@@ -165,7 +165,7 @@ def test_get_anndata_obsm_one_layer(dec_lts_census: soma.Collection, obsm_layer:
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("obsm_layers", [["scvi", "geneformer"]])
-def test_get_anndata_obsm_two_layers(dec_lts_census: soma.Collection, obsm_layers: list[str]) -> None:
+def test_get_anndata_obsm_two_layers(dec_lts_census: soma.Collection, obsm_layers: List[str]) -> None:
     # NOTE: This test only works on the 2023-12-15 LTS Census, since in newer releases
     # the embeddings aren't distributed via the `obsm_layer` parameter.
     ad = cellxgene_census.get_anndata(
@@ -184,10 +184,8 @@ def test_get_anndata_obsm_two_layers(dec_lts_census: soma.Collection, obsm_layer
 
 
 @pytest.mark.live_corpus
-@pytest.mark.parametrize("obs_embeddings", [["scvi", "geneformer"]])
-def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings: list[str]) -> None:
-    # NOTE: when the next LTS gets released (>2023-12-15), embeddings may or may not be available,
-    # so this test could require adjustments.
+@pytest.mark.parametrize("obs_embeddings", [["scvi", "scgpt"]])
+def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings: List[str]) -> None:
     ad = cellxgene_census.get_anndata(
         lts_census,
         organism="Homo sapiens",
@@ -206,7 +204,7 @@ def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings:
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("var_embeddings", [["nmf"]])
-def test_get_anndata_var_embeddings(dec_lts_census: soma.Collection, var_embeddings: list[str]) -> None:
+def test_get_anndata_var_embeddings(dec_lts_census: soma.Collection, var_embeddings: List[str]) -> None:
     # NOTE: this test only works on the 2023-12-15 LTS Census, since var embeddings
     # aren't available in the newer releases.
 
@@ -301,7 +299,7 @@ def test_deprecated_column_api(census: soma.Collection) -> None:
     pd.testing.assert_frame_equal(ad_curr.var, ad_prev.var)
 
 
-def _map_to_get_anndata_args(query: dict[str, Any], axis: Literal["obs", "var"]) -> dict[str, Any]:
+def _map_to_get_anndata_args(query: Dict[str, Any], axis: Literal["obs", "var"]) -> Dict[str, Any]:
     """Helper to map arguments of get_obs/ get_var to get_anndata."""
     result = {}
     if "coords" in query:
@@ -334,7 +332,7 @@ def _map_to_get_anndata_args(query: dict[str, Any], axis: Literal["obs", "var"])
         pytest.param({"value_filter": "tissue_general == 'vasculature'"}, id="value_filter"),
     ],
 )
-def test_get_obs(lts_census: soma.Collection, query: dict[str, Any]) -> None:
+def test_get_obs(lts_census: soma.Collection, query: Dict[str, Any]) -> None:
     adata_obs = cellxgene_census.get_anndata(
         lts_census, organism="Mus musculus", **_map_to_get_anndata_args(query, "obs")
     ).obs
@@ -360,7 +358,7 @@ def test_get_obs(lts_census: soma.Collection, query: dict[str, Any]) -> None:
         pytest.param({"value_filter": "feature_name in ['Gm53058', '0610010K14Rik']"}, id="value_filter"),
     ],
 )
-def test_get_var(lts_census: soma.Collection, query: dict[str, Any]) -> None:
+def test_get_var(lts_census: soma.Collection, query: Dict[str, Any]) -> None:
     adata_var = cellxgene_census.get_anndata(
         lts_census, organism="Mus musculus", obs_coords=slice(0), **_map_to_get_anndata_args(query, "var")
     ).var
diff --git a/api/python/cellxgene_census/tests/test_lts_compat.py b/api/python/cellxgene_census/tests/test_lts_compat.py
index 2c486d541..dbe646cdd 100644
--- a/api/python/cellxgene_census/tests/test_lts_compat.py
+++ b/api/python/cellxgene_census/tests/test_lts_compat.py
@@ -9,8 +9,7 @@
 from __future__ import annotations
 
 from collections import deque
-from collections.abc import Iterator, Sequence
-from typing import Literal, TypeAlias, get_args
+from typing import Iterator, Literal, Sequence, Union, get_args
 
 import pyarrow as pa
 import pytest
@@ -28,9 +27,14 @@
 ]
 CollectionTypeNames = ["SOMACollection", "SOMAExperiment", "SOMAMeasurement"]
 
-SOMATypes: TypeAlias = (
-    soma.Collection | soma.DataFrame | soma.SparseNDArray | soma.DenseNDArray | soma.Experiment | soma.Measurement
-)
+SOMATypes = Union[
+    soma.Collection,
+    soma.DataFrame,
+    soma.SparseNDArray,
+    soma.DenseNDArray,
+    soma.Experiment,
+    soma.Measurement,
+]
 
 
 def walk_census(
diff --git a/api/python/cellxgene_census/tests/test_open.py b/api/python/cellxgene_census/tests/test_open.py
index 5945ea9e4..df20b3337 100644
--- a/api/python/cellxgene_census/tests/test_open.py
+++ b/api/python/cellxgene_census/tests/test_open.py
@@ -442,8 +442,8 @@ def test_opening_census_without_anon_access_fails_with_bogus_creds() -> None:
     os.environ["AWS_SECRET_ACCESS_KEY"] = "fake_key"
     # Passing an empty context
     with pytest.raises(
-        (tiledb.TileDBError, soma.DoesNotExistError),
-        match=r"does not exist",
+        tiledb.TileDBError,
+        match=r"The AWS Access Key Id you provided does not exist in our records",
     ):
         cellxgene_census.open_soma(census_version="latest", context=soma.SOMATileDBContext())
 
diff --git a/api/python/cellxgene_census/tests/test_user_agent.py b/api/python/cellxgene_census/tests/test_user_agent.py
index 41612c649..dc410df9a 100644
--- a/api/python/cellxgene_census/tests/test_user_agent.py
+++ b/api/python/cellxgene_census/tests/test_user_agent.py
@@ -3,10 +3,9 @@
 
 import json
 import os
-from collections.abc import Callable
 from functools import partial
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Callable
 
 import numpy as np
 import proxy
diff --git a/api/python/notebooks/README.md b/api/python/notebooks/README.md
index cdf89656d..2b1c683ef 100644
--- a/api/python/notebooks/README.md
+++ b/api/python/notebooks/README.md
@@ -9,7 +9,7 @@ Demonstration notebooks for the CZ CELLxGENE Discover Census. There are two kind
 
 You must be on a Linux or MacOS system, with the following installed:
 
-* Python 3.10 to 3.12
+* Python 3.8 to 3.11
 * Jupyter or some other means of running notebooks (e.g., vscode)
 
 For now, it is recommended that you do all this on a host with sufficient memory,
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 0cfbd969b..4654eb37a 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -4,7 +4,7 @@
 
 The Census API requires a Linux or MacOS system with:
 
-- Python 3.10 to Python 3.12. Or R, supported versions TBD.
+- Python 3.8 to Python 3.11. Or R, supported versions TBD.
 - Recommended: >16 GB of memory.
 - Recommended: >5 Mbps internet connection.
 - Recommended: for increased performance use the API through a AWS-EC2 instance from the region `us-west-2`. The Census data builds are hosted in a AWS-S3 bucket in that region.