From 5c0c30d42716fb860d21245730b0d1cc6bdf2e3f Mon Sep 17 00:00:00 2001 From: nguyenv Date: Wed, 24 Jul 2024 13:39:52 -0500 Subject: [PATCH] Drop unused categories in `ExperimentAxisQuery.to_anndata` (#204) Co-authored-by: John Kerl --- python-spec/src/somacore/query/query.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python-spec/src/somacore/query/query.py b/python-spec/src/somacore/query/query.py index c938ec76..bbd15604 100644 --- a/python-spec/src/somacore/query/query.py +++ b/python-spec/src/somacore/query/query.py @@ -277,6 +277,7 @@ def to_anndata( obsp_layers: Sequence[str] = (), varm_layers: Sequence[str] = (), varp_layers: Sequence[str] = (), + drop_levels: bool = False, ) -> anndata.AnnData: """ Executes the query and return result as an ``AnnData`` in-memory object. @@ -295,10 +296,14 @@ def to_anndata( Additional varm layers to read and return in the varm slot. varp_layers: Additional varp layers to read and return in the varp slot. + drop_levels: + Indicate whether unused categories on axis frames should be + dropped. By default, False, the categories which are present in the SOMA Experiment + and not present in the query output are not dropped. Lifecycle: maturing """ - return self._read( + ad = self._read( X_name, column_names=column_names or AxisColumnNames(obs=None, var=None), X_layers=X_layers, @@ -308,6 +313,17 @@ def to_anndata( varp_layers=varp_layers, ).to_anndata() + # Drop unused categories on axis dataframes if requested + if drop_levels: + for name in ad.obs: + if pd.api.types.is_categorical_dtype(ad.obs[name]): + ad.obs[name] = ad.obs[name].cat.remove_unused_categories() + for name in ad.var: + if pd.api.types.is_categorical_dtype(ad.var[name]): + ad.var[name] = ad.var[name].cat.remove_unused_categories() + + return ad + # Context management def close(self) -> None: