From 767c0b07969b5dfeb698b8b7c84f3b377ba8b6f2 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Fri, 14 Jun 2024 07:23:24 -0500 Subject: [PATCH] [python] [NO MERGE UNTIL 1.14] Remove tiledb-py dependency --- apis/python/setup.py | 2 +- apis/python/src/tiledbsoma/_measurement.py | 2 + apis/python/src/tiledbsoma/_tdb_handles.py | 5 +- .../_registration/ambient_label_mappings.py | 6 +- apis/python/src/tiledbsoma/io/_util.py | 10 +- apis/python/src/tiledbsoma/io/ingest.py | 67 +++++------ .../options/_soma_tiledb_context.py | 80 ++----------- .../options/_tiledb_create_write_options.py | 105 ++++-------------- apis/python/src/tiledbsoma/pytiledbsoma.cc | 2 + apis/python/src/tiledbsoma/soma_array.cc | 15 +++ apis/python/src/tiledbsoma/soma_collection.cc | 8 -- apis/python/src/tiledbsoma/vfs.cc | 59 ++++++++++ apis/python/tests/test_basic_anndata_io.py | 13 ++- apis/python/tests/test_context.py | 44 ++------ apis/python/tests/test_dataframe.py | 71 +++++++----- .../tests/test_dataframe_index_columns.py | 3 +- apis/python/tests/test_dense_nd_array.py | 32 ++++-- apis/python/tests/test_experiment_query.py | 12 +- apis/python/tests/test_factory.py | 12 +- apis/python/tests/test_platform_config.py | 3 +- apis/python/tests/test_sparse_nd_array.py | 54 +++++---- apis/python/tests/test_util_tiledb.py | 9 +- libtiledbsoma/src/utils/arrow_adapter.cc | 10 +- libtiledbsoma/src/utils/arrow_adapter.h | 4 +- 24 files changed, 303 insertions(+), 325 deletions(-) create mode 100644 apis/python/src/tiledbsoma/vfs.cc diff --git a/apis/python/setup.py b/apis/python/setup.py index ba1a88cd67..a251a1c842 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -304,6 +304,7 @@ def run(self): "src/tiledbsoma/common.cc", "src/tiledbsoma/reindexer.cc", "src/tiledbsoma/query_condition.cc", + "src/tiledbsoma/vfs.cc", "src/tiledbsoma/soma_context.cc", "src/tiledbsoma/soma_array.cc", "src/tiledbsoma/soma_object.cc", @@ -343,7 +344,6 @@ def run(self): "scipy", # Note: the somacore version is in .pre-commit-config.yaml too "somacore==1.0.14", - "tiledb~=0.31.0", "typing-extensions", # Note "-" even though `import typing_extensions` ], extras_require={ diff --git a/apis/python/src/tiledbsoma/_measurement.py b/apis/python/src/tiledbsoma/_measurement.py index 8f0ae08087..49681ab877 100644 --- a/apis/python/src/tiledbsoma/_measurement.py +++ b/apis/python/src/tiledbsoma/_measurement.py @@ -73,6 +73,8 @@ class Measurement( # type: ignore[misc] # __eq__ false positive __slots__ = () _wrapper_type = _tdb_handles.MeasurementWrapper + _wrapper_type = _tdb_handles.MeasurementWrapper + _subclass_constrained_soma_types = { "var": ("SOMADataFrame",), "X": ("SOMACollection",), diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index 9573dbf40f..5b2e163521 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -338,7 +338,6 @@ def _opener( timestamp: int, ) -> clib.SOMAArray: open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write - return cls._ARRAY_WRAPPED_TYPE.open( uri, mode=open_mode, @@ -361,6 +360,10 @@ def _do_initial_reads(self, reader: RawHandle) -> None: def schema(self) -> pa.Schema: return self._handle.schema + @property + def config_options(self) -> clib.PlatformConfig: + return self._handle.config_options + @property def meta(self) -> "MetadataWrapper": return self.metadata diff --git a/apis/python/src/tiledbsoma/io/_registration/ambient_label_mappings.py b/apis/python/src/tiledbsoma/io/_registration/ambient_label_mappings.py index b391ddbc74..0fe9eac925 100644 --- a/apis/python/src/tiledbsoma/io/_registration/ambient_label_mappings.py +++ b/apis/python/src/tiledbsoma/io/_registration/ambient_label_mappings.py @@ -189,8 +189,7 @@ def from_isolated_h5ad( experiment, not in append mode, but allowing us to still have the bulk of the ingestor code to be non-duplicated between non-append mode and append mode. """ - tiledb_ctx = None if context is None else context.tiledb_ctx - with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata: + with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata: return cls.from_isolated_anndata( adata, measurement_name=measurement_name, @@ -434,8 +433,7 @@ def from_h5ad_append_on_experiment( """Extends registration data to one more H5AD input file.""" tiledbsoma.logging.logger.info(f"Registration: registering {h5ad_file_name}.") - tiledb_ctx = None if context is None else context.tiledb_ctx - with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata: + with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata: return cls.from_anndata_append_on_experiment( adata, previous, diff --git a/apis/python/src/tiledbsoma/io/_util.py b/apis/python/src/tiledbsoma/io/_util.py index 3376b3be81..720f013ebf 100644 --- a/apis/python/src/tiledbsoma/io/_util.py +++ b/apis/python/src/tiledbsoma/io/_util.py @@ -16,10 +16,10 @@ import pyarrow as pa from anndata._core import file_backing -import tiledb - +from .. import pytiledbsoma as clib from .._exception import SOMAError from .._types import Path +from ..options import SOMATileDBContext _pa_type_to_str_fmt = { pa.string(): "U", @@ -42,12 +42,14 @@ @contextmanager def read_h5ad( - input_path: Path, *, mode: str = "r", ctx: Optional[tiledb.Ctx] = None + input_path: Path, *, mode: str = "r", ctx: Optional[SOMATileDBContext] = None ) -> Iterator[ad.AnnData]: """ This lets us ingest H5AD with "r" (backed mode) from S3 URIs. """ - input_handle = tiledb.VFS(ctx=ctx).open(input_path) + ctx = ctx or SOMATileDBContext() + vfs = clib.VFS(ctx.native_context) + input_handle = clib.VFSFilebuf(vfs).open(str(input_path)) try: with _hack_patch_anndata(): anndata = ad.read_h5ad(_FSPathWrapper(input_handle, input_path), mode) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 6ad366d9ee..a25e3688da 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -52,7 +52,6 @@ eta, logging, ) -from .. import pytiledbsoma as clib from .._arrow_types import df_to_arrow from .._collection import AnyTileDBCollection, CollectionBase from .._common_nd_array import NDArray @@ -101,7 +100,7 @@ signatures, ) from ._registration.signatures import OriginalIndexMetadata, _prepare_df_for_ingest -from ._util import get_arrow_str_format, read_h5ad +from ._util import read_h5ad _NDArr = TypeVar("_NDArr", bound=NDArray) _TDBO = TypeVar("_TDBO", bound=SOMAObject[RawHandle]) @@ -359,7 +358,7 @@ def from_h5ad( logging.log_io(None, f"START READING {input_path}") - with read_h5ad(input_path, mode="r", ctx=context.tiledb_ctx) as anndata: + with read_h5ad(input_path, mode="r", ctx=context) as anndata: logging.log_io(None, _util.format_elapsed(s, f"FINISH READING {input_path}")) uri = from_anndata( @@ -1491,6 +1490,21 @@ def _update_dataframe( new_data, default_index_name ) + old_keys = set(old_sig.keys()) + new_keys = set(new_sig.keys()) + common_keys = old_keys.intersection(new_keys) + + msgs = [] + for key in common_keys: + old_type = old_sig[key] + new_type = new_sig[key] + + if old_type != new_type: + msgs.append(f"{key} type {old_type} != {new_type}") + if msgs: + msg = ", ".join(msgs) + raise ValueError(f"unsupported type updates: {msg}") + with DataFrame.open( sdf.uri, mode="r", context=context, platform_config=platform_config ) as sdf_r: @@ -1522,45 +1536,14 @@ def _update_dataframe( f"{caller_name}: old data soma_joinid must be [0,{num_old_data}), found {len(jid_diffs)} diffs: {', '.join(jid_diff_strs)}" ) - old_keys = set(old_sig.keys()) - new_keys = set(new_sig.keys()) - drop_keys = old_keys.difference(new_keys) - add_keys = new_keys.difference(old_keys) - common_keys = old_keys.intersection(new_keys) - - msgs = [] - for key in common_keys: - old_type = old_sig[key] - new_type = new_sig[key] - - if old_type != new_type: - msgs.append(f"{key} type {old_type} != {new_type}") - if msgs: - msg = ", ".join(msgs) - raise ValueError(f"unsupported type updates: {msg}") - - arrow_table = df_to_arrow(new_data) - arrow_schema = arrow_table.schema.remove_metadata() - - add_attrs = dict() - add_enmrs = dict() - for add_key in add_keys: - # Don't directly use the new dataframe's dtypes. Go through the - # to-Arrow-schema logic, and back, as this recapitulates the original - # schema-creation logic. - atype = arrow_schema.field(add_key).type - if pa.types.is_dictionary(arrow_table.schema.field(add_key).type): - add_attrs[add_key] = get_arrow_str_format(atype.index_type) - add_enmrs[add_key] = ( - get_arrow_str_format(atype.value_type), - atype.ordered, - ) - else: - add_attrs[add_key] = get_arrow_str_format(atype) - - clib._update_dataframe( - sdf.uri, sdf.context.native_context, list(drop_keys), add_attrs, add_enmrs - ) + new_data.reset_index(inplace=True) + if default_index_name is not None: + if default_index_name in new_data: + if "index" in new_data: + new_data.drop(columns=["index"], inplace=True) + else: + new_data.rename(columns={"index": default_index_name}, inplace=True) + sdf_r._handle._handle.update(df_to_arrow(new_data).schema) _write_dataframe( df_uri=sdf.uri, diff --git a/apis/python/src/tiledbsoma/options/_soma_tiledb_context.py b/apis/python/src/tiledbsoma/options/_soma_tiledb_context.py index 8acb377601..d95a182725 100644 --- a/apis/python/src/tiledbsoma/options/_soma_tiledb_context.py +++ b/apis/python/src/tiledbsoma/options/_soma_tiledb_context.py @@ -9,32 +9,17 @@ import functools import threading import time -import warnings from concurrent.futures import ThreadPoolExecutor from typing import Any, Dict, Literal, Mapping, Optional, Union from somacore import ContextBase from typing_extensions import Self -import tiledb - from .. import pytiledbsoma as clib -from .._general_utilities import assert_version_before from .._types import OpenTimestamp from .._util import ms_to_datetime, to_timestamp_ms -def _warn_ctx_deprecation() -> None: - assert_version_before(1, 14) - warnings.warn( - "tiledb_ctx is now deprecated for removal in 1.14. " - "Use tiledb_config instead by passing " - "SOMATileDBContext(tiledb_config=ctx.config().dict()).", - DeprecationWarning, - stacklevel=3, - ) - - def _default_config( override: Mapping[str, Union[str, float]] ) -> Dict[str, Union[str, float]]: @@ -51,9 +36,9 @@ def _default_config( @functools.lru_cache(maxsize=None) -def _default_global_ctx() -> tiledb.Ctx: +def _default_global_native_context() -> clib.SOMAContext: """Lazily builds a default TileDB Context with the default config.""" - return tiledb.Ctx(_default_config({})) + return clib.SOMAContext({k: str(v) for k, v in _default_config({}).items()}) def _maybe_timestamp_ms(input: Optional[OpenTimestamp]) -> Optional[int]: @@ -81,7 +66,6 @@ class SOMATileDBContext(ContextBase): def __init__( self, - tiledb_ctx: Optional[tiledb.Ctx] = None, tiledb_config: Optional[Dict[str, Union[str, float]]] = None, timestamp: Optional[OpenTimestamp] = None, threadpool: Optional[ThreadPoolExecutor] = None, @@ -133,27 +117,12 @@ def __init__( provided, a new ThreadPoolExecutor will be created with default settings. """ - if tiledb_ctx is not None: - _warn_ctx_deprecation() - - if tiledb_ctx is not None and tiledb_config is not None: - raise ValueError( - "only one of tiledb_ctx or tiledb_config" - " may be set when constructing a SOMATileDBContext" - ) self._lock = threading.Lock() """A lock to ensure single initialization of ``_tiledb_ctx``.""" - self._initial_config = ( + self._initial_config: Optional[Dict[str, Union[str, float]]] = ( None if tiledb_config is None else _default_config(tiledb_config) ) - """A dictionary of options to override the default TileDB config. - - This includes both the user-provided options and the default options - that we provide to TileDB. If this is unset, then either we were - provided with a TileDB Ctx, or we need to use The Default Global Ctx. - """ - self._tiledb_ctx = tiledb_ctx """The TileDB context to use, either provided or lazily constructed.""" self._timestamp_ms = _maybe_timestamp_ms(timestamp) @@ -184,25 +153,14 @@ def native_context(self) -> clib.SOMAContext: """The C++ SOMAContext for this SOMA context.""" with self._lock: if self._native_context is None: - cfg = self._internal_tiledb_config() - self._native_context = clib.SOMAContext( - {k: str(v) for k, v in cfg.items()} - ) - return self._native_context - - @property - def tiledb_ctx(self) -> tiledb.Ctx: - """The TileDB-Py Context for this SOMA context.""" - _warn_ctx_deprecation() - - with self._lock: - if self._tiledb_ctx is None: if self._initial_config is None: - # Special case: we need to use the One Global Default. - self._tiledb_ctx = _default_global_ctx() + self._native_context = _default_global_native_context() else: - self._tiledb_ctx = tiledb.Ctx(self._initial_config) - return self._tiledb_ctx + cfg = self._internal_tiledb_config() + self._native_context = clib.SOMAContext( + {k: str(v) for k, v in cfg.items()} + ) + return self._native_context @property def tiledb_config(self) -> Dict[str, Union[str, float]]: @@ -228,11 +186,6 @@ def _internal_tiledb_config(self) -> Dict[str, Union[str, float]]: if self._native_context is not None: return dict(self._native_context.config()) - # We have TileDB Context. Return its actual config. - # TODO This block will be deleted once tiledb_ctx is removed in 1.14 - if self._tiledb_ctx is not None: - return dict(self._tiledb_ctx.config()) - # Our context has not yet been built. # We return what will be passed into the context. return ( @@ -245,7 +198,6 @@ def replace( self, *, tiledb_config: Optional[Dict[str, Any]] = None, - tiledb_ctx: Optional[tiledb.Ctx] = None, timestamp: Union[None, OpenTimestamp, _Unset] = _UNSET, threadpool: Union[None, ThreadPoolExecutor, _Unset] = _UNSET, ) -> Self: @@ -277,15 +229,7 @@ def replace( ... tiledb_config={"vfs.s3.region": None}) """ with self._lock: - if tiledb_ctx is not None: - _warn_ctx_deprecation() - if tiledb_config is not None: - if tiledb_ctx: - raise ValueError( - "Either tiledb_config or tiledb_ctx may be provided" - " to replace(), but not both." - ) new_config = self._internal_tiledb_config() new_config.update(tiledb_config) tiledb_config = {k: v for (k, v) in new_config.items() if v is not None} @@ -300,7 +244,6 @@ def replace( assert timestamp is None or isinstance(timestamp, (datetime.datetime, int)) return type(self)( tiledb_config=tiledb_config, - tiledb_ctx=tiledb_ctx, timestamp=timestamp, threadpool=threadpool, ) @@ -325,11 +268,6 @@ def _validate_soma_tiledb_context(context: Any) -> SOMATileDBContext: if context is None: return SOMATileDBContext() - if isinstance(context, tiledb.Ctx): - raise TypeError( - "context is a tiledb.Ctx, not a SOMATileDBContext -- please wrap it in tiledbsoma.SOMATileDBContext(...)" - ) - if not isinstance(context, SOMATileDBContext): raise TypeError("context is not a SOMATileDBContext") diff --git a/apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py b/apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py index 099fb170a2..5b2632c403 100644 --- a/apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py +++ b/apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py @@ -1,4 +1,3 @@ -import warnings from typing import ( Any, Dict, @@ -11,7 +10,6 @@ TypedDict, TypeVar, Union, - cast, ) import attrs as attrs_ # We use the name `attrs` later. @@ -19,10 +17,6 @@ from somacore import options from typing_extensions import Self -import tiledb - -from .._general_utilities import assert_version_before - # Most defaults are configured directly as default attribute values # within TileDBCreateOptions. DEFAULT_TILE_EXTENT = 2048 @@ -195,44 +189,6 @@ def cell_tile_orders(self) -> Tuple[Optional[str], Optional[str]]: return DEFAULT_CELL_ORDER, DEFAULT_TILE_ORDER return self.cell_order, self.tile_order - def offsets_filters_tiledb(self) -> Tuple[tiledb.Filter, ...]: - """Constructs the real TileDB Filters to use for offsets.""" - assert_version_before(1, 14) - warnings.warn( - "`offsets_filters_tiledb` is now deprecated for removal in 1.14 " - "as we no longer support returning tiledb.Filter. " - "Use `offsets_filters` instead.", - DeprecationWarning, - ) - - return tuple(_build_filter(f) for f in self.offsets_filters) - - def validity_filters_tiledb(self) -> Optional[Tuple[tiledb.Filter, ...]]: - """Constructs the real TileDB Filters to use for the validity map.""" - assert_version_before(1, 14) - warnings.warn( - "`validity_filters_tiledb` is now deprecated for removal in 1.14 " - "as we no longer support returning tiledb.Filter. " - "Use `validity_filters` instead.", - DeprecationWarning, - ) - if self.validity_filters is None: - return None - return tuple(_build_filter(f) for f in self.validity_filters) - - def dim_filters_tiledb( - self, dim: str, default: Sequence[_FilterSpec] = () - ) -> Tuple[tiledb.Filter, ...]: - """Constructs the real TileDB Filters to use for the named dimension.""" - assert_version_before(1, 14) - warnings.warn( - "`dim_filters_tiledb` is now deprecated for removal in 1.14 " - "as we no longer support returning tiledb.Filter. " - "Use `dims` instead.", - DeprecationWarning, - ) - return _filters_from(self.dims, dim, default) - def dim_tile(self, dim_name: str, default: int = DEFAULT_TILE_EXTENT) -> int: """Returns the tile extent for the given dimension.""" try: @@ -241,19 +197,6 @@ def dim_tile(self, dim_name: str, default: int = DEFAULT_TILE_EXTENT) -> int: return default return default if dim.tile is None else dim.tile - def attr_filters_tiledb( - self, name: str, default: Sequence[_FilterSpec] = () - ) -> Tuple[tiledb.Filter, ...]: - """Constructs the real TileDB Filters to use for the named attribute.""" - assert_version_before(1, 14) - warnings.warn( - "`attr_filters_tiledb` is now deprecated for removal in 1.14 " - "as we no longer support returning tiledb.Filter. " - "Use `attrs` instead.", - DeprecationWarning, - ) - return _filters_from(self.attrs, name, default) - @attrs_.define(frozen=True, kw_only=True, slots=True) class TileDBWriteOptions: @@ -332,8 +275,25 @@ def _dig_platform_config( # Filter handling and construction. # -_FILTERS: Mapping[str, Type[tiledb.Filter]] = { - cls.__name__: cls for cls in tiledb.FilterList.filter_type_cc_to_python.values() +_FILTERS: Mapping[str, str] = { + "GzipFilter": "GZIP", + "ZstdFilter": "ZSTD", + "LZ4Filter": "LZ4", + "Bzip2Filter": "BZIP2", + "RleFilter": "RLE", + "DeltaFilter": "DELTA", + "DoubleDeltaFilter": "DOUBLE_DELTA", + "BitWidthReductionFilter": "BIT_WIDTH_REDUCTION", + "BitShuffleFilter": "BITSHUFFLE", + "ByteShuffleFilter": "BYTESHUFFLE", + "PositiveDeltaFilter": "POSITIVE_DELTA", + "ChecksumMD5Filter": "CHECKSUM_MD5", + "ChecksumSHA256Filter": "CHECKSUM_SHA256", + "DictionaryFilter": "DICTIONARY", + "FloatScaleFilter": "SCALE_FLOAT", + "XORFilter": "XOR", + "WebpFilter": "WEBP", + "NoOpFilter": "NONE", } @@ -358,30 +318,3 @@ def _normalize_filter(input: _FilterSpec) -> _DictFilterSpec: except KeyError as ke: raise ValueError(f"filter type {typ_name!r} unknown") from ke return dict(input) - - -def _filters_from( - col_configs: Mapping[str, _ColumnConfig], name: str, default: Sequence[_FilterSpec] -) -> Tuple[tiledb.Filter, ...]: - """Constructs the filters for the named column in ``col_configs``.""" - try: - cfg = col_configs[name] - except KeyError: - maybe_filters = None - else: - maybe_filters = cfg.filters - if maybe_filters is None: - filters = _normalize_filters(default) or () - else: - filters = maybe_filters - return tuple(_build_filter(f) for f in filters) - - -def _build_filter(item: _DictFilterSpec) -> tiledb.Filter: - """Build a single filter.""" - # Always make a copy here so we don't mutate the global state. - # We have validated this earlier so we don't do extra checking here. - kwargs = dict(item) - cls_name = cast(str, kwargs.pop("_type")) - cls = _FILTERS[cls_name] - return cls(**kwargs) diff --git a/apis/python/src/tiledbsoma/pytiledbsoma.cc b/apis/python/src/tiledbsoma/pytiledbsoma.cc index 82670549ab..58841fb248 100644 --- a/apis/python/src/tiledbsoma/pytiledbsoma.cc +++ b/apis/python/src/tiledbsoma/pytiledbsoma.cc @@ -26,6 +26,7 @@ void load_soma_group(py::module&); void load_soma_collection(py::module&); void load_query_condition(py::module&); void load_reindexer(py::module&); +void load_vfs(py::module&); PYBIND11_MODULE(pytiledbsoma, m) { py::register_exception(m, "SOMAError"); @@ -201,6 +202,7 @@ PYBIND11_MODULE(pytiledbsoma, m) { load_soma_collection(m); load_query_condition(m); load_reindexer(m); + load_vfs(m); } }; // namespace libtiledbsomacpp diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc index f89f7414f3..cf97c1cf00 100644 --- a/apis/python/src/tiledbsoma/soma_array.cc +++ b/apis/python/src/tiledbsoma/soma_array.cc @@ -82,6 +82,19 @@ void write_coords( } } +void update(SOMAArray& array, py::handle pyarrow_schema) { + ArrowSchema arrow_schema; + uintptr_t arrow_schema_ptr = (uintptr_t)(&arrow_schema); + pyarrow_schema.attr("_export_to_c")(arrow_schema_ptr); + + try { + array.update_columns(std::make_unique(arrow_schema)); + } catch (const std::exception& e) { + TPY_ERROR_LOC(e.what()); + } + arrow_schema.release(&arrow_schema); +} + void load_soma_array(py::module& m) { py::class_(m, "SOMAArray") .def( @@ -518,6 +531,8 @@ void load_soma_array(py::module& m) { .def("write_coords", write_coords) + .def("update", update) + .def("nnz", &SOMAArray::nnz, py::call_guard()) .def_property_readonly("shape", &SOMAArray::shape) diff --git a/apis/python/src/tiledbsoma/soma_collection.cc b/apis/python/src/tiledbsoma/soma_collection.cc index 27a53a152f..ffa0602e99 100644 --- a/apis/python/src/tiledbsoma/soma_collection.cc +++ b/apis/python/src/tiledbsoma/soma_collection.cc @@ -30,14 +30,6 @@ * This file defines the SOMACollection bindings. */ -#include -#include -#include -#include -#include - -#include - #include "common.h" namespace libtiledbsomacpp { diff --git a/apis/python/src/tiledbsoma/vfs.cc b/apis/python/src/tiledbsoma/vfs.cc new file mode 100644 index 0000000000..f2bc0a16ef --- /dev/null +++ b/apis/python/src/tiledbsoma/vfs.cc @@ -0,0 +1,59 @@ +/** + * @file vfs.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines the VFS bindings. + */ + +#include "common.h" + +namespace libtiledbsomacpp { + +namespace py = pybind11; +using namespace py::literals; +using namespace tiledbsoma; + +using VFSFilebuf = tiledb::impl::VFSFilebuf; + +void load_vfs(py::module& m) { + py::class_(m, "VFS").def( + py::init([](std::shared_ptr context) { + return tiledb::VFS(*context->tiledb_ctx()); + }), + "ctx"_a); + + py::class_(m, "VFSFilebuf") + .def(py::init()) + .def( + "open", + [](VFSFilebuf& buf, const std::string& uri) { + return buf.open(uri, std::ios::in); + }) + .def("close", &VFSFilebuf::close, "should_throw"_a = true); +} +} // namespace libtiledbsomacpp diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 4d235092f2..30e7fed82b 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -16,7 +16,14 @@ from tiledbsoma import Experiment, _constants, _factory from tiledbsoma._soma_object import SOMAObject from tiledbsoma._util import verify_obs_and_var_eq -import tiledb + +try: + import tiledb + + hastiledb = True +except ModuleNotFoundError: + hastiledb = False + from ._util import TESTDATA @@ -277,6 +284,7 @@ def _get_fragment_count(array_uri): TESTDATA / "pbmc-small-x-csc.h5ad", ], ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_resume_mode(resume_mode_h5ad_file): """ Makes sure resume-mode ingest after successful ingest of the same input data does not write @@ -335,6 +343,7 @@ def test_resume_mode(resume_mode_h5ad_file): @pytest.mark.parametrize("use_relative_uri", [False, True, None]) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_ingest_relative(conftest_pbmc3k_h5ad_path, use_relative_uri): tempdir = tempfile.TemporaryDirectory() output_path = tempdir.name @@ -737,6 +746,7 @@ def check(tdbo: SOMAObject): check(raw.X) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_null_obs(conftest_pbmc_small, tmp_path: Path): output_path = tmp_path.as_uri() seed = 42 @@ -782,6 +792,7 @@ def test_null_obs(conftest_pbmc_small, tmp_path: Path): assert obs.attr(k).isnullable +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path): adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix()) original = adata.copy() diff --git a/apis/python/tests/test_context.py b/apis/python/tests/test_context.py index 26a1191e53..083778fadb 100644 --- a/apis/python/tests/test_context.py +++ b/apis/python/tests/test_context.py @@ -5,40 +5,33 @@ import pytest import tiledbsoma.options._soma_tiledb_context as stc -import tiledb +import tiledbsoma.pytiledbsoma as clib @pytest.fixture(autouse=True) def global_ctx_reset(): - stc._default_global_ctx.cache_clear() + stc._default_global_native_context.cache_clear() yield def test_lazy_init(): """Verifies we don't construct a Ctx until we have to.""" - with mock.patch.object(tiledb, "Ctx", wraps=tiledb.Ctx) as mock_ctx: + with mock.patch.object(clib, "SOMAContext", wraps=clib.SOMAContext) as mock_ctx: context = stc.SOMATileDBContext(tiledb_config={}) assert context.tiledb_config == { "sm.mem.reader.sparse_global_order.ratio_array_data": 0.3 } mock_ctx.assert_not_called() - assert context._tiledb_ctx is None + assert context._native_context is None # Invoke the @property twice to ensure we only build one Ctx. - with pytest.deprecated_call(): - assert context.tiledb_ctx is context.tiledb_ctx + assert context.native_context is not None + assert context.native_context is context.native_context mock_ctx.assert_called_once() -def test_tiledb_ctx_init(): - config = {"hither": "yon"} - with pytest.deprecated_call(): - context = stc.SOMATileDBContext(tiledb_ctx=tiledb.Ctx(config)) - assert "hither" in context.tiledb_config - - def test_lazy_replace_config(): """Verifies we don't construct a Ctx even if we call ``.replace``.""" - with mock.patch.object(tiledb, "Ctx", wraps=tiledb.Ctx) as mock_ctx: + with mock.patch.object(clib, "SOMAContext", wraps=clib.SOMAContext) as mock_ctx: context = stc.SOMATileDBContext() new_context = context.replace(tiledb_config={"hello": "goodbye"}) assert new_context.tiledb_config == { @@ -65,8 +58,7 @@ def test_shared_ctx(): """Verifies that one global context is shared by default.""" ctx = stc.SOMATileDBContext() ctx_2 = stc.SOMATileDBContext() - with pytest.deprecated_call(): - assert ctx.tiledb_ctx is ctx_2.tiledb_ctx + assert ctx.native_context is ctx_2.native_context def test_unshared_ctx(): @@ -74,9 +66,8 @@ def test_unshared_ctx(): ctx = stc.SOMATileDBContext() ctx_2 = stc.SOMATileDBContext(tiledb_config={}) ctx_3 = stc.SOMATileDBContext(tiledb_config={}) - with pytest.deprecated_call(): - assert ctx.tiledb_ctx is not ctx_2.tiledb_ctx - assert ctx_2.tiledb_ctx is not ctx_3.tiledb_ctx + assert ctx.native_context is not ctx_2.native_context + assert ctx_2.native_context is not ctx_3.native_context def test_replace_timestamp(): @@ -94,16 +85,6 @@ def test_replace_timestamp(): assert no_ts_ctx.timestamp is None -def test_replace_context(): - with pytest.deprecated_call(): - orig_ctx = stc.SOMATileDBContext(tiledb_ctx=tiledb.Ctx()) - new_tdb_ctx = tiledb.Ctx({"vfs.s3.region": "hy-central-1"}) - with pytest.deprecated_call(): - new_ctx = orig_ctx.replace(tiledb_ctx=new_tdb_ctx) - with pytest.deprecated_call(): - assert new_ctx.tiledb_ctx is new_tdb_ctx - - def test_replace_config_after_construction(): context = stc.SOMATileDBContext() @@ -122,12 +103,11 @@ def test_replace_config_after_construction(): assert context_ts_1._open_timestamp_ms(None) == 1 assert context_ts_1._open_timestamp_ms(2) == 2 - with mock.patch.object(tiledb, "Ctx", wraps=tiledb.Ctx) as mock_ctx: + with mock.patch.object(clib, "SOMAContext", wraps=clib.SOMAContext) as mock_ctx: # verify that the new context is lazily initialized. new_soma_ctx = context.replace(tiledb_config={"vfs.s3.region": "us-west-2"}) assert new_soma_ctx.tiledb_config["vfs.s3.region"] == "us-west-2" mock_ctx.assert_not_called() - with pytest.deprecated_call(): - new_tdb_ctx = new_soma_ctx.tiledb_ctx + new_tdb_ctx = new_soma_ctx.native_context mock_ctx.assert_called_once() assert new_tdb_ctx.config()["vfs.s3.region"] == "us-west-2" diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py index b3201e9cc9..86dab817c4 100644 --- a/apis/python/tests/test_dataframe.py +++ b/apis/python/tests/test_dataframe.py @@ -11,7 +11,14 @@ from pandas.api.types import union_categoricals import tiledbsoma as soma -import tiledb + +try: + import tiledb + + hastiledb = True +except ModuleNotFoundError: + hastiledb = False + from tests._util import raises_no_typeguard @@ -129,11 +136,12 @@ def test_dataframe(tmp_path, arrow_schema): assert [e.as_py() for e in table["quux"]] == pydict["quux"] # Validate TileDB array schema - with tiledb.open(uri) as A: - assert A.schema.sparse - assert not A.schema.allows_duplicates - assert A.dim("foo").filters == [tiledb.ZstdFilter(level=3)] - assert A.attr("bar").filters == [tiledb.ZstdFilter()] + if hastiledb: + with tiledb.open(uri) as A: + assert A.schema.sparse + assert not A.schema.allows_duplicates + assert A.dim("foo").filters == [tiledb.ZstdFilter(level=3)] + assert A.attr("bar").filters == [tiledb.ZstdFilter()] with soma.DataFrame.open(uri) as sdf: assert sdf.count == 5 @@ -729,7 +737,7 @@ def make_multiply_indexed_dataframe(tmp_path, index_column_names: List[str]): "index_column_names": ["strings_aaa", "zero_one"], "coords": [[True], slice(None)], "A": None, - "throws": (RuntimeError, tiledb.cc.TileDBError, TypeError), + "throws": (RuntimeError, TypeError), }, { "name": "2D index empty", @@ -1036,27 +1044,34 @@ def test_result_order(tmp_path): next(sdf.read(result_order="bogus")) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") @pytest.mark.parametrize( "create_options,expected_schema_fields", ( ( {"allows_duplicates": True}, { - "validity_filters": tiledb.FilterList([tiledb.RleFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None + ), "allows_duplicates": True, }, ), ( {"allows_duplicates": False}, { - "validity_filters": tiledb.FilterList([tiledb.RleFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None + ), "allows_duplicates": False, }, ), ( {"validity_filters": ["NoOpFilter"], "allows_duplicates": False}, { - "validity_filters": tiledb.FilterList([tiledb.NoOpFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.NoOpFilter()]) if hastiledb else None + ), "allows_duplicates": False, }, ), @@ -1078,6 +1093,7 @@ def test_create_platform_config_overrides( @pytest.mark.parametrize("allows_duplicates", [False, True]) @pytest.mark.parametrize("consolidate", [False, True]) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_timestamped_ops(tmp_path, allows_duplicates, consolidate): uri = tmp_path.as_posix() @@ -1484,23 +1500,24 @@ def test_enum_schema_report(tmp_path): sdf.write(arrow_table) # Double-check against TileDB-Py reporting - with tiledb.open(uri) as A: - for i in range(A.schema.nattr): - attr = A.schema.attr(i) - try: - index_type = attr.dtype - value_type = A.enum(attr.name).dtype - except tiledb.cc.TileDBError: - pass # not an enum attr - if attr.name == "int_cat": - assert index_type.name == "int8" - assert value_type.name == "int64" - elif attr.name == "str_cat": - assert index_type.name == "int8" - assert value_type.name == "str32" - elif attr.name == "byte_cat": - assert index_type.name == "int8" - assert value_type.name == "bytes8" + if hastiledb: + with tiledb.open(uri) as A: + for i in range(A.schema.nattr): + attr = A.schema.attr(i) + try: + index_type = attr.dtype + value_type = A.enum(attr.name).dtype + except tiledb.cc.TileDBError: + pass # not an enum attr + if attr.name == "int_cat": + assert index_type.name == "int8" + assert value_type.name == "int64" + elif attr.name == "str_cat": + assert index_type.name == "int8" + assert value_type.name == "str32" + elif attr.name == "byte_cat": + assert index_type.name == "int8" + assert value_type.name == "bytes8" # Verify SOMA Arrow schema with soma.open(uri) as sdf: diff --git a/apis/python/tests/test_dataframe_index_columns.py b/apis/python/tests/test_dataframe_index_columns.py index 7629840603..463739c1b6 100644 --- a/apis/python/tests/test_dataframe_index_columns.py +++ b/apis/python/tests/test_dataframe_index_columns.py @@ -3,7 +3,6 @@ import pytest import tiledbsoma as soma -import tiledb @pytest.fixture @@ -1899,6 +1898,6 @@ def test_types_read_errors( with soma.DataFrame.open(uri, "w") as sdf: sdf.write(arrow_table) - with pytest.raises((RuntimeError, tiledb.cc.TileDBError)): + with pytest.raises(RuntimeError): with soma.DataFrame.open(uri, "r") as sdf: sdf.read(coords=coords).concat() diff --git a/apis/python/tests/test_dense_nd_array.py b/apis/python/tests/test_dense_nd_array.py index 4a161b535d..9d262a2bc7 100644 --- a/apis/python/tests/test_dense_nd_array.py +++ b/apis/python/tests/test_dense_nd_array.py @@ -9,7 +9,14 @@ import tiledbsoma as soma from tiledbsoma.options import SOMATileDBContext -import tiledb + +try: + import tiledb + + hastiledb = True +except ModuleNotFoundError: + hastiledb = False + from . import NDARRAY_ARROW_TYPES_NOT_SUPPORTED, NDARRAY_ARROW_TYPES_SUPPORTED from ._util import raises_no_typeguard @@ -50,8 +57,9 @@ def test_dense_nd_array_create_ok( assert not a.schema.field("soma_data").nullable # Validate TileDB array schema - with tiledb.open(tmp_path.as_posix()) as A: - assert not A.schema.sparse + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert not A.schema.sparse # Ensure read mode uses clib object with soma.DenseNDArray.open(tmp_path.as_posix(), "r") as A: @@ -151,8 +159,9 @@ def test_dense_nd_array_read_write_tensor(tmp_path, shape: Tuple[int, ...]): assert np.array_equal(data, table.combine_chunks().to_numpy().reshape(shape)) # Validate TileDB array schema - with tiledb.open(tmp_path.as_posix()) as A: - assert not A.schema.sparse + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert not A.schema.sparse # write a single-value sub-array and recheck with soma.DenseNDArray.open(tmp_path.as_posix(), "w") as c: @@ -281,7 +290,7 @@ def test_dense_nd_array_slicing(tmp_path, io): cfg = {} if "cfg" in io: cfg = io["cfg"] - context = SOMATileDBContext(tiledb_ctx=tiledb.Ctx(cfg)) + context = SOMATileDBContext(cfg) nr = 4 nc = 6 @@ -311,13 +320,13 @@ def test_dense_nd_array_slicing(tmp_path, io): "name": "negative", "shape": (10,), "coords": (-1,), - "throws": (RuntimeError, tiledb.cc.TileDBError), + "throws": RuntimeError, }, { "name": "12 in 10 domain", "shape": (10,), "coords": (12,), - "throws": (RuntimeError, tiledb.cc.TileDBError), + "throws": RuntimeError, }, { "name": "too many dims", @@ -415,9 +424,10 @@ def test_tile_extents(tmp_path): }, ).close() - with tiledb.open(tmp_path.as_posix()) as A: - assert A.schema.domain.dim(0).tile == 100 - assert A.schema.domain.dim(1).tile == 2048 + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert A.schema.domain.dim(0).tile == 100 + assert A.schema.domain.dim(1).tile == 2048 def test_timestamped_ops(tmp_path): diff --git a/apis/python/tests/test_experiment_query.py b/apis/python/tests/test_experiment_query.py index 1897a273f3..12ede30a12 100644 --- a/apis/python/tests/test_experiment_query.py +++ b/apis/python/tests/test_experiment_query.py @@ -1,3 +1,4 @@ +import re from concurrent import futures from contextlib import nullcontext from typing import Tuple @@ -15,7 +16,6 @@ from tiledbsoma import SOMATileDBContext, _factory from tiledbsoma._collection import CollectionBase from tiledbsoma.experiment_query import X_as_series -import tiledb from tests._util import raises_no_typeguard @@ -950,11 +950,11 @@ def test_empty_categorical_query(conftest_pbmc_small_exp): measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"') ) # Empty query on a categorical column raised ArrowInvalid before TileDB 2.21; see https://github.com/single-cell-data/TileDB-SOMA/pull/2299 - ctx = ( - nullcontext() - if tiledb.libtiledb.version() >= (2, 21) - else pytest.raises(ArrowInvalid) - ) + m = re.fullmatch(r"libtiledb=(\d+\.\d+\.\d+)", soma.pytiledbsoma.version()) + version = m.group(1).split(".") + major, minor = int(version[0]), int(version[1]) + + ctx = nullcontext() if (major, minor) >= (2, 21) else pytest.raises(ArrowInvalid) with ctx: obs = q.obs().concat() assert len(obs) == 0 diff --git a/apis/python/tests/test_factory.py b/apis/python/tests/test_factory.py index 7be3c917b5..71888a04fb 100644 --- a/apis/python/tests/test_factory.py +++ b/apis/python/tests/test_factory.py @@ -6,10 +6,16 @@ import tiledbsoma as soma from tiledbsoma import _constants -import tiledb UNKNOWN_ENCODING_VERSION = "3141596" +try: + import tiledb + + hastiledb = True +except ModuleNotFoundError: + hastiledb = False + @pytest.fixture def tiledb_object_uri(tmp_path, object_type, metadata_typename, encoding_version): @@ -76,6 +82,7 @@ def tiledb_object_uri(tmp_path, object_type, metadata_typename, encoding_version ), ], ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_open(tiledb_object_uri, expected_soma_type: Type): """Happy path tests""" # TODO: Fix Windows test failures without the following. @@ -122,6 +129,7 @@ def test_open(tiledb_object_uri, expected_soma_type: Type): ), ], ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_open_wrong_type(tiledb_object_uri, wrong_type): with pytest.raises((soma.SOMAError, TypeError)): soma.open(tiledb_object_uri, soma_type=wrong_type) @@ -138,6 +146,7 @@ def test_open_wrong_type(tiledb_object_uri, wrong_type): ("array", "SOMASparseNDArray", UNKNOWN_ENCODING_VERSION), ], ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_factory_unsupported_version(tiledb_object_uri): """All of these should raise, as they are encoding formats from the future""" # TODO: Fix Windows test failures without the following. @@ -169,6 +178,7 @@ def test_factory_unsupported_version(tiledb_object_uri): ), # DataFrame can't be a group ], ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_factory_unsupported_types(tiledb_object_uri): """Illegal or non-existant metadata""" with pytest.raises(soma.SOMAError): diff --git a/apis/python/tests/test_platform_config.py b/apis/python/tests/test_platform_config.py index e2bf06c456..245192fdf5 100644 --- a/apis/python/tests/test_platform_config.py +++ b/apis/python/tests/test_platform_config.py @@ -7,7 +7,8 @@ import tiledbsoma.io import tiledbsoma.options._tiledb_create_write_options as tco from tiledbsoma._util import verify_obs_and_var_eq -import tiledb + +tiledb = pytest.importorskip("tiledb") def test_platform_config(conftest_pbmc_small): diff --git a/apis/python/tests/test_sparse_nd_array.py b/apis/python/tests/test_sparse_nd_array.py index 1ccc597020..71e697781d 100644 --- a/apis/python/tests/test_sparse_nd_array.py +++ b/apis/python/tests/test_sparse_nd_array.py @@ -18,7 +18,13 @@ import tiledbsoma as soma from tiledbsoma import _factory from tiledbsoma.options import SOMATileDBContext -import tiledb + +try: + import tiledb + + hastiledb = True +except ModuleNotFoundError: + hastiledb = False from . import NDARRAY_ARROW_TYPES_NOT_SUPPORTED, NDARRAY_ARROW_TYPES_SUPPORTED from ._util import raises_no_typeguard @@ -318,9 +324,10 @@ def test_sparse_nd_array_read_write_sparse_tensor( assert t.shape == shape # Validate TileDB array schema - with tiledb.open(tmp_path.as_posix()) as A: - assert A.schema.sparse - assert not A.schema.allows_duplicates + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert A.schema.sparse + assert not A.schema.allows_duplicates @pytest.mark.parametrize("shape", [(10,), (23, 4), (5, 3, 1), (8, 4, 2, 30)]) @@ -343,9 +350,10 @@ def test_sparse_nd_array_read_write_table( assert tables_are_same_value(data, t) # Validate TileDB array schema - with tiledb.open(tmp_path.as_posix()) as A: - assert A.schema.sparse - assert not A.schema.allows_duplicates + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert A.schema.sparse + assert not A.schema.allows_duplicates @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) @@ -372,9 +380,10 @@ def test_sparse_nd_array_read_as_pandas( ) # Validate TileDB array schema - with tiledb.open(tmp_path.as_posix()) as A: - assert A.schema.sparse - assert not A.schema.allows_duplicates + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert A.schema.sparse + assert not A.schema.allows_duplicates @pytest.mark.parametrize("shape_is_nones", [True, False]) @@ -616,10 +625,7 @@ def test_csr_csc_2d_read(tmp_path, shape): "dims": { "soma_dim_0": [2, 4], }, - "throws": ( - RuntimeError, - tiledb.cc.TileDBError, - ), + "throws": (RuntimeError), }, { "name": "coords=[0,0]", @@ -1081,9 +1087,10 @@ def test_tile_extents(tmp_path): }, ).close() - with tiledb.open(tmp_path.as_posix()) as A: - assert A.schema.domain.dim(0).tile == 100 - assert A.schema.domain.dim(1).tile == 2048 + if hastiledb: + with tiledb.open(tmp_path.as_posix()) as A: + assert A.schema.domain.dim(0).tile == 100 + assert A.schema.domain.dim(1).tile == 2048 @pytest.mark.parametrize( @@ -1092,26 +1099,33 @@ def test_tile_extents(tmp_path): ( {"allows_duplicates": True}, { - "validity_filters": tiledb.FilterList([tiledb.RleFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None + ), "allows_duplicates": True, }, ), ( {"allows_duplicates": False}, { - "validity_filters": tiledb.FilterList([tiledb.RleFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None + ), "allows_duplicates": False, }, ), ( {"validity_filters": ["NoOpFilter"], "allows_duplicates": False}, { - "validity_filters": tiledb.FilterList([tiledb.NoOpFilter()]), + "validity_filters": ( + tiledb.FilterList([tiledb.NoOpFilter()]) if hastiledb else None + ), "allows_duplicates": False, }, ), ), ) +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_create_platform_config_overrides( tmp_path, create_options, expected_schema_fields ): diff --git a/apis/python/tests/test_util_tiledb.py b/apis/python/tests/test_util_tiledb.py index 4dbada2d09..76bc176f0d 100644 --- a/apis/python/tests/test_util_tiledb.py +++ b/apis/python/tests/test_util_tiledb.py @@ -2,9 +2,16 @@ import pytest import tiledbsoma as soma -import tiledb +try: + import tiledb + hastiledb = True +except ModuleNotFoundError: + hastiledb = False + + +@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed") def test_stats(tmp_path, capsys: pytest.CaptureFixture[str]): """Make sure these exist, don't throw, and write correctly.""" tiledb.stats_enable() diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 3e94cf6c4e..1596a04918 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -521,7 +521,7 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( for (int64_t i = 0; i < index_column_schema->n_children; ++i) { auto col_name = index_column_schema->children[i]->name; if (strcmp(child->name, col_name) == 0) { - if (ArrowAdapter::_isvar(child->format)) { + if (ArrowAdapter::is_var_arrow_format(child->format)) { type = TILEDB_STRING_ASCII; } @@ -549,7 +549,7 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( attr.set_nullable(true); } - if (ArrowAdapter::_isvar(child->format)) { + if (ArrowAdapter::is_var_arrow_format(child->format)) { attr.set_cell_val_num(TILEDB_VAR_NUM); } @@ -560,7 +560,9 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( *ctx, child->name, enmr_type, - ArrowAdapter::_isvar(enmr_format) ? TILEDB_VAR_NUM : 1, + ArrowAdapter::is_var_arrow_format(enmr_format) ? + TILEDB_VAR_NUM : + 1, child->flags & ARROW_FLAG_DICTIONARY_ORDERED); ArraySchemaExperimental::add_enumeration(*ctx, schema, enmr); AttributeExperimental::set_enumeration_name( @@ -846,7 +848,7 @@ ArrowAdapter::to_arrow(std::shared_ptr column) { return std::pair(std::move(array), std::move(schema)); } -bool ArrowAdapter::_isvar(const char* format) { +bool ArrowAdapter::is_var_arrow_format(const char* format) { if ((strcmp(format, "U") == 0) || (strcmp(format, "Z") == 0) || (strcmp(format, "u") == 0) || (strcmp(format, "z") == 0)) { return true; diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index 34c130439e..5672a30fc6 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -210,6 +210,8 @@ class ArrowAdapter { static std::string_view to_arrow_format( tiledb_datatype_t tiledb_dtype, bool use_large = true); + static bool is_var_arrow_format(const char* format); + /** * @brief Get TileDB datatype from Arrow format string. * @@ -244,8 +246,6 @@ class ArrowAdapter { return Dimension::create(*ctx, name, {b[0], b[1]}, b[2]); } - static bool _isvar(const char* format); - static FilterList _create_filter_list( std::string filters, std::shared_ptr ctx);