From 2d670965f2c951d3249c2ca04f76dcbd84deadb1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Oct 2024 14:00:15 -0400 Subject: [PATCH 01/24] sdf temp --- apis/python/src/tiledbsoma/_dataframe.py | 39 ++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 01e14f1e3d..5a1b9f4452 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -136,6 +136,45 @@ class DataFrame(SOMAArray, somacore.DataFrame): """ _wrapper_type = DataFrameWrapper + _clib_handle: clib.SOMADataFrame + + @classmethod + def open( + cls, + uri: str, + mode: options.OpenMode = "r", + *, + tiledb_timestamp: Optional[OpenTimestamp] = None, + context: Optional[SOMATileDBContext] = None, + platform_config: Optional[options.PlatformConfig] = None, + clib_type: Optional[str] = None, + ) -> Self: + """Opens this specific type of SOMA object.""" + + retval = super().open( + uri, + mode, + tiledb_timestamp=tiledb_timestamp, + context=context, + platform_config=platform_config, + clib_type="SOMAArray", + ) + + # XXX libify + open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write + context = _validate_soma_tiledb_context(context) + timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) + + retval._clib_handle = clib.SOMADataFrame.open( + uri, + open_mode, + context.native_context, + column_names=[], # XXX + result_order=clib.ResultOrder.automatic, # XXX + timestamp=(0, timestamp_ms), + ) + + return retval @classmethod def create( From 88667770e6a6a8c0f0fece42e0c0b5a54d2aeee9 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Oct 2024 14:02:57 -0400 Subject: [PATCH 02/24] sdf temp --- apis/python/src/tiledbsoma/_dataframe.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 5a1b9f4452..f1b9e7b6b1 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -426,8 +426,8 @@ def count(self) -> int: Maturing. """ self._check_open_read() - # if is it in read open mode, then it is a DataFrameWrapper - return cast(DataFrameWrapper, self._handle).count + # XXX WHY + return cast(int, self._clib_handle.count) @property def _maybe_soma_joinid_shape(self) -> Optional[int]: @@ -439,7 +439,7 @@ def _maybe_soma_joinid_shape(self) -> Optional[int]: Lifecycle: Experimental. """ - return self._handle.maybe_soma_joinid_shape + return cast(Optional[int], self._clib_handle.maybe_soma_joinid_shape) @property def _maybe_soma_joinid_maxshape(self) -> Optional[int]: @@ -450,7 +450,7 @@ def _maybe_soma_joinid_maxshape(self) -> Optional[int]: Lifecycle: Experimental. """ - return self._handle.maybe_soma_joinid_maxshape + return cast(Optional[int], self._clib_handle.maybe_soma_joinid_maxshape) @property def tiledbsoma_has_upgraded_domain(self) -> bool: @@ -461,7 +461,7 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: Lifecycle: Maturing. """ - return self._handle.tiledbsoma_has_upgraded_domain + return cast(bool, self._clib_handle.tiledbsoma_has_upgraded_domain) def resize_soma_joinid_shape( self, newshape: int, check_only: bool = False @@ -479,10 +479,10 @@ def resize_soma_joinid_shape( if check_only: return cast( StatusAndReason, - self._handle._handle.can_resize_soma_joinid_shape(newshape), + self._clib_handle.can_resize_soma_joinid_shape(newshape), ) else: - self._handle._handle.resize_soma_joinid_shape(newshape) + self._clib_handle.resize_soma_joinid_shape(newshape) return (True, "") def upgrade_soma_joinid_shape( @@ -498,10 +498,10 @@ def upgrade_soma_joinid_shape( if check_only: return cast( StatusAndReason, - self._handle._handle.can_upgrade_soma_joinid_shape(newshape), + self._clib_handle.can_upgrade_soma_joinid_shape(newshape), ) else: - self._handle._handle.upgrade_soma_joinid_shape(newshape) + self._clib_handle.upgrade_soma_joinid_shape(newshape) return (True, "") def __len__(self) -> int: From 7d06679e09c1d21d706335c784c37b7435b49204 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Oct 2024 14:23:33 -0400 Subject: [PATCH 03/24] temp wip --- apis/python/src/tiledbsoma/_dataframe.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index f1b9e7b6b1..2ce85bb0e9 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -369,11 +369,22 @@ def create( raise map_exception_for_create(e, uri) from None handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) - return cls( + retval = cls( handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) + retval._clib_handle = clib.SOMADataFrame.open( + uri, + clib.OpenMode.write, + context.native_context, + column_names=[], # XXX + result_order=clib.ResultOrder.automatic, # XXX + timestamp=(0, timestamp_ms), + ) + + return retval + def keys(self) -> Tuple[str, ...]: """Returns the names of the columns when read back as a dataframe. From 19c7f2b82462b7442c5f81752c86d63895be25fc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Oct 2024 14:49:20 -0400 Subject: [PATCH 04/24] handle temp --- apis/python/src/tiledbsoma/_dataframe.py | 16 ++++++---------- apis/python/src/tiledbsoma/_soma_array.py | 3 ++- apis/python/src/tiledbsoma/_soma_object.py | 3 ++- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 2ce85bb0e9..c95a14e474 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -586,25 +586,23 @@ def read( _util.check_unpartitioned(partitions) self._check_open_read() - handle = self._handle._handle - - context = handle.context() + context = self._clib_handle.context() if platform_config is not None: config = context.tiledb_config.copy() config.update(platform_config) context = clib.SOMAContext(config) sr = clib.SOMADataFrame.open( - uri=handle.uri, + uri=self._clib_handle.uri, mode=clib.OpenMode.read, context=context, column_names=column_names or [], result_order=_util.to_clib_result_order(result_order), - timestamp=handle.timestamp and (0, handle.timestamp), + timestamp=self._clib_handle.timestamp and (0, self._clib_handle.timestamp), ) if value_filter is not None: - sr.set_condition(QueryCondition(value_filter), handle.schema) + sr.set_condition(QueryCondition(value_filter), self._clib_handle.schema) self._set_reader_coords(sr, coords) @@ -658,13 +656,11 @@ def write( write_options = TileDBWriteOptions.from_platform_config(platform_config) sort_coords = write_options.sort_coords - clib_dataframe = self._handle._handle - for batch in values.to_batches(): - clib_dataframe.write(batch, sort_coords or False) + self._clib_handle.write(batch, sort_coords or False) if write_options.consolidate_and_vacuum: - clib_dataframe.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self diff --git a/apis/python/src/tiledbsoma/_soma_array.py b/apis/python/src/tiledbsoma/_soma_array.py index 5e022418f8..ea5294684f 100644 --- a/apis/python/src/tiledbsoma/_soma_array.py +++ b/apis/python/src/tiledbsoma/_soma_array.py @@ -39,7 +39,7 @@ def open( clib_type: Optional[str] = None, ) -> Self: """Opens this specific type of SOMA object.""" - return super().open( + retval = super().open( uri, mode, tiledb_timestamp=tiledb_timestamp, @@ -47,6 +47,7 @@ def open( platform_config=platform_config, clib_type="SOMAArray", ) + return retval @property def schema(self) -> pa.Schema: diff --git a/apis/python/src/tiledbsoma/_soma_object.py b/apis/python/src/tiledbsoma/_soma_object.py index 71fae027ee..ec153ed6c0 100644 --- a/apis/python/src/tiledbsoma/_soma_object.py +++ b/apis/python/src/tiledbsoma/_soma_object.py @@ -51,7 +51,7 @@ class SOMAObject(somacore.SOMAObject, Generic[_WrapperType_co]): ] """Class variable of the Wrapper class used to open this object type.""" - __slots__ = ("_close_stack", "_handle") + __slots__ = ("_close_stack", "_handle", "_clib_handle") @classmethod def open( @@ -142,6 +142,7 @@ def __init__( f" internal use only." ) self._handle = handle + self._clib_handle = handle._handle self._close_stack.enter_context(self._handle) def reopen( From 9cac64a2d460277c30f23db286f06002c5f13b70 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 20:43:12 -0400 Subject: [PATCH 05/24] handle move --- apis/python/src/tiledbsoma/_dataframe.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index c95a14e474..e82c7b128d 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -136,7 +136,6 @@ class DataFrame(SOMAArray, somacore.DataFrame): """ _wrapper_type = DataFrameWrapper - _clib_handle: clib.SOMADataFrame @classmethod def open( @@ -165,15 +164,6 @@ def open( context = _validate_soma_tiledb_context(context) timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) - retval._clib_handle = clib.SOMADataFrame.open( - uri, - open_mode, - context.native_context, - column_names=[], # XXX - result_order=clib.ResultOrder.automatic, # XXX - timestamp=(0, timestamp_ms), - ) - return retval @classmethod @@ -374,15 +364,6 @@ def create( _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) - retval._clib_handle = clib.SOMADataFrame.open( - uri, - clib.OpenMode.write, - context.native_context, - column_names=[], # XXX - result_order=clib.ResultOrder.automatic, # XXX - timestamp=(0, timestamp_ms), - ) - return retval def keys(self) -> Tuple[str, ...]: From d6ea6a72299fd71def6ff658526014153dd8259d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 20:52:41 -0400 Subject: [PATCH 06/24] snda --- apis/python/src/tiledbsoma/_dataframe.py | 7 +--- .../python/src/tiledbsoma/_sparse_nd_array.py | 37 ++++++++++--------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index e82c7b128d..61bf1d6b38 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -159,11 +159,6 @@ def open( clib_type="SOMAArray", ) - # XXX libify - open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write - context = _validate_soma_tiledb_context(context) - timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) - return retval @classmethod @@ -694,7 +689,7 @@ def _set_reader_coord( if coord.stop is None: # There's no way to specify "to infinity" for strings. # We have to get the nonempty domain and use that as the end. - ned = self._handle.non_empty_domain() + ned = self._clib_handle.non_empty_domain() _, stop = ned[dim_idx] else: stop = coord.stop diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index b9049a6b57..438ee866bb 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -228,7 +228,7 @@ def nnz(self) -> int: Maturing. """ self._check_open_read() - return cast(SparseNDArrayWrapper, self._handle).nnz + return cast(int, self._clib_handle.nnz()) def read( self, @@ -274,24 +274,22 @@ def read( * Negative indexing is unsupported. """ del batch_size # Currently unused. - handle: clib.SOMASparseNDArray = self._handle._handle - self._check_open_read() _util.check_unpartitioned(partitions) - context = handle.context() + context = self._clib_handle.context() if platform_config is not None: config = context.tiledb_config.copy() config.update(platform_config) context = clib.SOMAContext(config) sr = clib.SOMASparseNDArray.open( - uri=handle.uri, + uri=self._clib_handle.uri, mode=clib.OpenMode.read, context=context, column_names=[], result_order=_util.to_clib_result_order(result_order), - timestamp=handle.timestamp and (0, handle.timestamp), + timestamp=self._clib_handle.timestamp and (0, self._clib_handle.timestamp), ) return SparseNDArrayRead(sr, self, coords) @@ -308,9 +306,11 @@ def resize( would not. """ if check_only: - return self._handle.tiledbsoma_can_resize(newshape) + return cast( + StatusAndReason, self._clib_handle.tiledbsoma_can_resize(newshape) + ) else: - self._handle.resize(newshape) + self._clib_handle.resize(newshape) return (True, "") def tiledbsoma_upgrade_shape( @@ -321,9 +321,12 @@ def tiledbsoma_upgrade_shape( any dimension. Raises an error if the array already has a shape. """ if check_only: - return self._handle.tiledbsoma_can_upgrade_shape(newshape) + return cast( + StatusAndReason, + self._clib_handle.tiledbsoma_can_upgrade_shape(newshape), + ) else: - self._handle.tiledbsoma_upgrade_shape(newshape) + self._clib_handle.tiledbsoma_upgrade_shape(newshape) return (True, "") def write( @@ -370,12 +373,10 @@ def write( write_options = TileDBWriteOptions.from_platform_config(platform_config) sort_coords = write_options.sort_coords - clib_sparse_array = self._handle._handle - if isinstance(values, pa.SparseCOOTensor): # Write bulk data data, coords = values.to_numpy() - clib_sparse_array.write_coords( + self._clib_handle.write_coords( [ np.array( c, @@ -396,7 +397,7 @@ def write( if write_options.consolidate_and_vacuum: # Consolidate non-bulk data - clib_sparse_array.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self if isinstance(values, (pa.SparseCSCMatrix, pa.SparseCSRMatrix)): @@ -407,7 +408,7 @@ def write( # Write bulk data # TODO: the ``to_scipy`` function is not zero copy. Need to explore zero-copy options. sp = values.to_scipy().tocoo() - clib_sparse_array.write_coords( + self._clib_handle.write_coords( [ np.array( c, @@ -428,14 +429,14 @@ def write( if write_options.consolidate_and_vacuum: # Consolidate non-bulk data - clib_sparse_array.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self if isinstance(values, pa.Table): # Write bulk data values = _util.cast_values_to_target_schema(values, self.schema) for batch in values.to_batches(): - clib_sparse_array.write(batch, sort_coords or False) + self._clib_handle.write(batch, sort_coords or False) # Write bounding-box metadata maxes = [] @@ -451,7 +452,7 @@ def write( if write_options.consolidate_and_vacuum: # Consolidate non-bulk data - clib_sparse_array.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self raise TypeError( From 6fa2e8a953b911da7314554f98fefbc238825677 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 20:53:09 -0400 Subject: [PATCH 07/24] dnda --- apis/python/src/tiledbsoma/_dense_nd_array.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py index c2cb90ce3f..a7ac342fc2 100644 --- a/apis/python/src/tiledbsoma/_dense_nd_array.py +++ b/apis/python/src/tiledbsoma/_dense_nd_array.py @@ -216,12 +216,10 @@ def read( # # The only exception is if the array has been created but no data have been written at # all, in which case the best we can do is use the schema shape. - handle: clib.SOMADenseNDArray = self._handle._handle - ned = [] - for dim_name in handle.dimension_names: + for dim_name in self._clib_handle.dimension_names: dtype = np.dtype(self.schema.field(dim_name).type.to_pandas_dtype()) - slot = handle.non_empty_domain_slot_opt(dim_name, dtype) + slot = self._clib_handle.non_empty_domain_slot_opt(dim_name, dtype) if slot is None: use_shape = True break @@ -229,22 +227,22 @@ def read( else: use_shape = False - data_shape = tuple(handle.shape if use_shape else ned) + data_shape = tuple(self._clib_handle.shape if use_shape else ned) target_shape = dense_indices_to_shape(coords, data_shape, result_order) - context = handle.context() + context = self._clib_handle.context() if platform_config is not None: config = context.tiledb_config.copy() config.update(platform_config) context = clib.SOMAContext(config) sr = clib.SOMADenseNDArray.open( - uri=handle.uri, + uri=self._clib_handle.uri, mode=clib.OpenMode.read, context=context, column_names=[], result_order=_util.to_clib_result_order(result_order), - timestamp=handle.timestamp and (0, handle.timestamp), + timestamp=self._clib_handle.timestamp and (0, self._clib_handle.timestamp), ) self._set_reader_coords(sr, coords) @@ -304,8 +302,6 @@ def write( """ _util.check_type("values", values, (pa.Tensor,)) - clib_dense_array = self._handle._handle - # Compute the coordinates for the dense array. new_coords: List[Union[int, Slice[int], None]] = [] for c in coords: @@ -325,13 +321,13 @@ def write( if not input.flags.contiguous: input = np.ascontiguousarray(input) order = clib.ResultOrder.rowmajor - clib_dense_array.reset(result_order=order) - self._set_reader_coords(clib_dense_array, new_coords) - clib_dense_array.write(input) + self._clib_handle.reset(result_order=order) + self._set_reader_coords(self._clib_handle, new_coords) + self._clib_handle.write(input) tiledb_write_options = TileDBWriteOptions.from_platform_config(platform_config) if tiledb_write_options.consolidate_and_vacuum: - clib_dense_array.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self def resize(self, newshape: Sequence[Union[int, None]]) -> None: @@ -339,7 +335,7 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: ``DenseNDArray`` in TileDB-SOMA 1.15 """ if clib.embedded_version_triple() >= (2, 27, 0): - self._handle.resize(newshape) + self._clib_handle.resize(newshape) else: raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0") From 015b000bf174b074325a73096fc996d5138ad03b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 21:04:09 -0400 Subject: [PATCH 08/24] sdf one more --- apis/python/src/tiledbsoma/_dataframe.py | 4 +++- apis/python/src/tiledbsoma/_sparse_nd_array.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 61bf1d6b38..e188ef8d62 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -693,7 +693,9 @@ def _set_reader_coord( _, stop = ned[dim_idx] else: stop = coord.stop - sr.set_dim_ranges_string_or_bytes(dim.name, [(start, stop)]) + # Use str(...) in case this is an Arrow string type, to satisfy + # the type-checker + sr.set_dim_ranges_string_or_bytes(dim.name, [(str(start), str(stop))]) return True # Note: slice(None, None) matches the is_slice_of part, unless we also check the dim-type diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 438ee866bb..e793de16f8 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -307,7 +307,7 @@ def resize( """ if check_only: return cast( - StatusAndReason, self._clib_handle.tiledbsoma_can_resize(newshape) + StatusAndReason, self._clib_handle.can_resize(newshape) ) else: self._clib_handle.resize(newshape) From 665d9135e463db89ab93ebd2f68f1dc4f95498ff Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 21:09:35 -0400 Subject: [PATCH 09/24] more dataframes --- .../src/tiledbsoma/_point_cloud_dataframe.py | 22 ++++++++----------- .../python/src/tiledbsoma/_sparse_nd_array.py | 4 +--- .../src/tiledbsoma/_spatial_dataframe.py | 2 +- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py index 59795e55f4..61af9917b2 100644 --- a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py +++ b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py @@ -7,7 +7,7 @@ """ import warnings -from typing import Any, Optional, Sequence, Tuple, Union, cast +from typing import Any, Optional, Sequence, Tuple, Union import pyarrow as pa import somacore @@ -284,7 +284,7 @@ def count(self) -> int: """Returns the number of rows in the dataframe.""" self._check_open_read() # if is it in read open mode, then it is a PointCloudDataFrameWrapper - return cast(PointCloudDataFrameWrapper, self._handle).count + return self._clib_handle.count def read( self, @@ -322,25 +322,23 @@ def read( _util.check_unpartitioned(partitions) self._check_open_read() - handle = self._handle._handle - - context = handle.context() + context = self._clib_handle.context() if platform_config is not None: config = context.tiledb_config.copy() config.update(platform_config) context = clib.SOMAContext(config) sr = clib.SOMAPointCloudDataFrame.open( - uri=handle.uri, + uri=self._clib_handle.uri, mode=clib.OpenMode.read, context=context, column_names=column_names or [], result_order=_util.to_clib_result_order(result_order), - timestamp=handle.timestamp and (0, handle.timestamp), + timestamp=self._clib_handle.timestamp and (0, self._clib_handle.timestamp), ) if value_filter is not None: - sr.set_condition(QueryCondition(value_filter), handle.schema) + sr.set_condition(QueryCondition(value_filter), self._clib_handle.schema) self._set_reader_coords(sr, coords) @@ -428,7 +426,7 @@ def read_spatial_region( dict(), # Move index value_filters into this dict to optimize queries self._tiledb_dim_names(), self._coord_space.axis_names, - self._handle.schema, + self._clib_handle.schema, ) return somacore.SpatialRead( @@ -479,13 +477,11 @@ def write( write_options = TileDBWriteOptions.from_platform_config(platform_config) sort_coords = write_options.sort_coords - clib_dataframe = self._handle._handle - for batch in values.to_batches(): - clib_dataframe.write(batch, sort_coords or False) + self._clib_handle.write(batch, sort_coords or False) if write_options.consolidate_and_vacuum: - clib_dataframe.consolidate_and_vacuum() + self._clib_handle.consolidate_and_vacuum() return self diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index e793de16f8..13878818d5 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -306,9 +306,7 @@ def resize( would not. """ if check_only: - return cast( - StatusAndReason, self._clib_handle.can_resize(newshape) - ) + return cast(StatusAndReason, self._clib_handle.can_resize(newshape)) else: self._clib_handle.resize(newshape) return (True, "") diff --git a/apis/python/src/tiledbsoma/_spatial_dataframe.py b/apis/python/src/tiledbsoma/_spatial_dataframe.py index 571b5d116c..0e8f8136ec 100644 --- a/apis/python/src/tiledbsoma/_spatial_dataframe.py +++ b/apis/python/src/tiledbsoma/_spatial_dataframe.py @@ -223,7 +223,7 @@ def _set_reader_coord( if coord.stop is None: # There's no way to specify "to infinity" for strings. # We have to get the nonempty domain and use that as the end. - ned = self._handle.non_empty_domain() + ned = self._clib_handle.non_empty_domain() _, stop = ned[dim_idx] else: stop = coord.stop From 31fa2afdb88c1bbc6e6488ffac7854381e611e4b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 21:19:49 -0400 Subject: [PATCH 10/24] lint --- apis/python/src/tiledbsoma/_point_cloud_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py index 61af9917b2..ea29ce0b99 100644 --- a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py +++ b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py @@ -284,7 +284,7 @@ def count(self) -> int: """Returns the number of rows in the dataframe.""" self._check_open_read() # if is it in read open mode, then it is a PointCloudDataFrameWrapper - return self._clib_handle.count + return cast(int, self._clib_handle.count) def read( self, From 7734202a8f7d7324b97585218d580b38e075534f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 21:22:46 -0400 Subject: [PATCH 11/24] lint --- apis/python/src/tiledbsoma/_common_nd_array.py | 6 +++--- apis/python/src/tiledbsoma/_point_cloud_dataframe.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apis/python/src/tiledbsoma/_common_nd_array.py b/apis/python/src/tiledbsoma/_common_nd_array.py index 8a21d7bc03..f83892b370 100644 --- a/apis/python/src/tiledbsoma/_common_nd_array.py +++ b/apis/python/src/tiledbsoma/_common_nd_array.py @@ -93,7 +93,7 @@ def shape(self) -> Tuple[int, ...]: Lifecycle: Maturing. """ - return cast(Tuple[int, ...], tuple(self._handle.shape)) + return cast(Tuple[int, ...], tuple(self._clib_handle.shape)) @property def maxshape(self) -> Tuple[int, ...]: @@ -104,7 +104,7 @@ def maxshape(self) -> Tuple[int, ...]: Lifecycle: Maturing. """ - return cast(Tuple[int, ...], tuple(self._handle.maxshape)) + return cast(Tuple[int, ...], tuple(self._clib_handle.maxshape)) @property def tiledbsoma_has_upgraded_shape(self) -> bool: @@ -115,7 +115,7 @@ def tiledbsoma_has_upgraded_shape(self) -> bool: Lifecycle: Maturing. """ - return self._handle.tiledbsoma_has_upgraded_shape + return cast(bool, self._clib_handle.tiledbsoma_has_upgraded_shape) @classmethod def _dim_capacity_and_extent( diff --git a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py index ea29ce0b99..d1c09d1dc1 100644 --- a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py +++ b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py @@ -7,7 +7,7 @@ """ import warnings -from typing import Any, Optional, Sequence, Tuple, Union +from typing import Any, Optional, Sequence, Tuple, Union, cast import pyarrow as pa import somacore From 17684d8e0488c7f4d702ee5cce6f1c0a9b660b19 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 21:23:40 -0400 Subject: [PATCH 12/24] more --- apis/python/src/tiledbsoma/_soma_group.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/apis/python/src/tiledbsoma/_soma_group.py b/apis/python/src/tiledbsoma/_soma_group.py index c56fbba370..99c6b895d1 100644 --- a/apis/python/src/tiledbsoma/_soma_group.py +++ b/apis/python/src/tiledbsoma/_soma_group.py @@ -157,13 +157,12 @@ def _set_element( # TileDB groups currently do not support replacing elements. # If we use a hack to flush writes, corruption is possible. raise SOMAError(f"replacing key {key!r} is unsupported") - clib_collection = self._handle._handle relative_type = clib.URIType.relative if relative else clib.URIType.absolute - clib_collection.add( + self._clib_handle.add( uri=uri, uri_type=relative_type, name=key, - soma_type=clib_collection.type, + soma_type=self._clib_handle.type, ) self._contents[key] = _CachedElement( entry=_tdb_handles.GroupEntry(soma_object.uri, soma_object._wrapper_type), From 9fcf6b0ec124fcd17940398fc59ea6eb6b0fb43e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 23:49:33 -0400 Subject: [PATCH 13/24] push more methods out of _tdb_handles.py --- apis/python/src/tiledbsoma/_soma_array.py | 79 ++++++++++---- apis/python/src/tiledbsoma/_tdb_handles.py | 119 --------------------- apis/python/src/tiledbsoma/io/ingest.py | 4 +- 3 files changed, 60 insertions(+), 142 deletions(-) diff --git a/apis/python/src/tiledbsoma/_soma_array.py b/apis/python/src/tiledbsoma/_soma_array.py index ea5294684f..d4af841b77 100644 --- a/apis/python/src/tiledbsoma/_soma_array.py +++ b/apis/python/src/tiledbsoma/_soma_array.py @@ -3,8 +3,9 @@ # # Licensed under the MIT License. -from typing import Any, Optional, Sequence, Tuple +from typing import Any, List, Optional, Sequence, Tuple +import numpy as np import pyarrow as pa from somacore import options from typing_extensions import Self @@ -57,7 +58,11 @@ def schema(self) -> pa.Schema: Lifecycle: Maturing. """ - return self._handle.schema + return self._clib_handle.schema + + @property + def ndim(self) -> int: + return len(self._clib_handle.dimension_names) def config_options_from_schema(self) -> clib.PlatformConfig: """Returns metadata about the array that is not encompassed within the @@ -91,18 +96,7 @@ def config_options_from_schema(self) -> clib.PlatformConfig: * cell_order: str * consolidate_and_vacuum: bool """ - return self._handle.config_options_from_schema() - - def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]: - """ - Retrieves the non-empty domain for each dimension, namely the smallest - and largest indices in each dimension for which the array/dataframe has - data occupied. This is nominally the same as the domain used at - creation time, but if for example only a portion of the available domain - has actually had data written, this function will return a tighter - range. - """ - return self._handle.non_empty_domain() + return self._clib_handle.config_options_from_schema() def _tiledb_array_keys(self) -> Tuple[str, ...]: """Return all dim and attr names.""" @@ -110,13 +104,56 @@ def _tiledb_array_keys(self) -> Tuple[str, ...]: def _tiledb_dim_names(self) -> Tuple[str, ...]: """Reads the dimension names from the schema: for example, ['obs_id', 'var_id'].""" - return self._handle.dim_names + return tuple(self._clib_handle.dimension_names) def _tiledb_attr_names(self) -> Tuple[str, ...]: """Reads the attribute names from the schema: for example, the list of column names in a dataframe. """ - return self._handle.attr_names + return self.attr_names + + @property + def dim_names(self) -> Tuple[str, ...]: + return tuple(self._clib_handle.dimension_names) + + @property + def attr_names(self) -> Tuple[str, ...]: + return tuple( + f.name + for f in self.schema + if f.name not in self._clib_handle.dimension_names + ) + + def _cast_domainish( + self, domainish: List[Any] + ) -> Tuple[Tuple[object, object], ...]: + result = [] + for i, slot in enumerate(domainish): + + arrow_type = slot[0].type + if pa.types.is_timestamp(arrow_type): + pandas_type = np.dtype(arrow_type.to_pandas_dtype()) + result.append( + tuple( + pandas_type.type(e.cast(pa.int64()).as_py(), arrow_type.unit) + for e in slot + ) + ) + else: + result.append(tuple(e.as_py() for e in slot)) + + return tuple(result) + + def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]: + """ + Retrieves the non-empty domain for each dimension, namely the smallest + and largest indices in each dimension for which the array/dataframe has + data occupied. This is nominally the same as the domain used at + creation time, but if for example only a portion of the available domain + has actually had data written, this function will return a tighter + range. + """ + return self._cast_domainish(self._clib_handle.non_empty_domain()) def _domain(self) -> Tuple[Tuple[Any, Any], ...]: """This is the SOMA domain, not the core domain. @@ -131,7 +168,7 @@ def _domain(self) -> Tuple[Tuple[Any, Any], ...]: * Core current domain is new as of core 2.25 and can be resized up to core (max) domain. """ - return self._handle.domain + return self._cast_domainish(self._clib_handle.domain()) def _maxdomain(self) -> Tuple[Tuple[Any, Any], ...]: """This is the SOMA maxdomain, not the core domain. @@ -146,7 +183,7 @@ def _maxdomain(self) -> Tuple[Tuple[Any, Any], ...]: * Core current domain is new as of core 2.25 and can be resized up to core (max) domain. """ - return self._handle.maxdomain + return self._cast_domainish(self._clib_handle.maxdomain()) def _set_reader_coords(self, sr: clib.SOMAArray, coords: Sequence[object]) -> None: """Parses the given coords and sets them on the SOMA Reader.""" @@ -156,10 +193,10 @@ def _set_reader_coords(self, sr: clib.SOMAArray, coords: Sequence[object]) -> No " not str or bytes" ) - if len(coords) > self._handle.ndim: + if len(coords) > self.ndim: raise ValueError( f"coords ({len(coords)} elements) must be shorter than ndim" - f" ({self._handle.ndim})" + f" ({self.ndim})" ) for i, coord in enumerate(coords): dim = self.schema.field(i) @@ -190,7 +227,7 @@ def _set_reader_coord( if isinstance(coord, slice): _util.validate_slice(coord) try: - dom = self._handle.domain[dim_idx] + dom = self._domain()[dim_idx] lo_hi = _util.slice_to_numeric_range(coord, dom) except _util.NonNumericDimensionError: return False # We only handle numeric dimensions here. diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index 7930fd2c26..0ee17fa89b 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -15,7 +15,6 @@ Dict, Generic, Iterator, - List, Mapping, MutableMapping, Optional, @@ -372,128 +371,10 @@ def _do_initial_reads(self, reader: RawHandle) -> None: # non–attrs-managed field self.metadata = MetadataWrapper(self, dict(reader.meta)) - @property - def schema(self) -> pa.Schema: - return self._handle.schema - - def config_options_from_schema(self) -> clib.PlatformConfig: - return self._handle.config_options_from_schema() - @property def meta(self) -> "MetadataWrapper": return self.metadata - @property - def ndim(self) -> int: - return len(self._handle.dimension_names) - - def _cast_domainish( - self, domainish: List[Any] - ) -> Tuple[Tuple[object, object], ...]: - result = [] - for i, slot in enumerate(domainish): - - arrow_type = slot[0].type - if pa.types.is_timestamp(arrow_type): - pandas_type = np.dtype(arrow_type.to_pandas_dtype()) - result.append( - tuple( - pandas_type.type(e.cast(pa.int64()).as_py(), arrow_type.unit) - for e in slot - ) - ) - else: - result.append(tuple(e.as_py() for e in slot)) - - return tuple(result) - - @property - def domain(self) -> Tuple[Tuple[object, object], ...]: - return self._cast_domainish(self._handle.domain()) - - @property - def maxdomain(self) -> Tuple[Tuple[object, object], ...]: - return self._cast_domainish(self._handle.maxdomain()) - - def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]: - return self._cast_domainish(self._handle.non_empty_domain()) - - @property - def attr_names(self) -> Tuple[str, ...]: - return tuple( - f.name for f in self.schema if f.name not in self._handle.dimension_names - ) - - @property - def dim_names(self) -> Tuple[str, ...]: - return tuple(self._handle.dimension_names) - - @property - def shape(self) -> Tuple[int, ...]: - """Not implemented for DataFrame.""" - return cast(Tuple[int, ...], tuple(self._handle.shape)) - - @property - def maxshape(self) -> Tuple[int, ...]: - """Not implemented for DataFrame.""" - return cast(Tuple[int, ...], tuple(self._handle.maxshape)) - - @property - def maybe_soma_joinid_shape(self) -> Optional[int]: - """Only implemented for DataFrame.""" - raise NotImplementedError - - @property - def maybe_soma_joinid_maxshape(self) -> Optional[int]: - """Only implemented for DataFrame.""" - raise NotImplementedError - - @property - def tiledbsoma_has_upgraded_shape(self) -> bool: - """Not implemented for DataFrame.""" - raise NotImplementedError - - @property - def tiledbsoma_has_upgraded_domain(self) -> bool: - """Only implemented for DataFrame.""" - raise NotImplementedError - - def resize(self, newshape: Sequence[Union[int, None]]) -> None: - """Not implemented for DataFrame.""" - raise NotImplementedError - - def tiledbsoma_can_resize( - self, newshape: Sequence[Union[int, None]] - ) -> StatusAndReason: - """Not implemented for DataFrame.""" - raise NotImplementedError - - def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: - """Not implemented for DataFrame.""" - raise NotImplementedError - - def tiledbsoma_can_upgrade_shape( - self, newshape: Sequence[Union[int, None]] - ) -> StatusAndReason: - """Not implemented for DataFrame.""" - raise NotImplementedError - - def resize_soma_joinid_shape(self, newshape: int) -> None: - """Only implemented for DataFrame.""" - raise NotImplementedError - - def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: - """Only implemented for DataFrame.""" - raise NotImplementedError - - def upgrade_soma_joinid_shape(self, newshape: int) -> None: - """Only implemented for DataFrame.""" - raise NotImplementedError - - def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: - """Only implemented for DataFrame.""" - raise NotImplementedError - class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]): """Wrapper around a Pybind11 SOMADataFrame handle.""" diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 81fb1d80f5..d7640ef97b 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1931,7 +1931,7 @@ def _write_matrix_to_denseNDArray( def _read_nonempty_domain(arr: SOMAArray) -> Any: try: - return arr._handle.non_empty_domain() + return arr.non_empty_domain() except (SOMAError, RuntimeError): # This means that we're open in write-only mode. # Reopen the array in read mode. @@ -1940,7 +1940,7 @@ def _read_nonempty_domain(arr: SOMAArray) -> Any: cls = type(arr) with cls.open(arr.uri, "r", platform_config=None, context=arr.context) as readarr: - return readarr._handle.non_empty_domain() + return readarr.non_empty_domain() def _find_sparse_chunk_size( From b9caa518c393e9c1adb34cd22066534763f6def3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 23:52:40 -0400 Subject: [PATCH 14/24] remove no-longer-used methods from _tdb_handles.py --- apis/python/src/tiledbsoma/_tdb_handles.py | 78 ---------------------- 1 file changed, 78 deletions(-) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index 0ee17fa89b..9eccf673cf 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -388,84 +388,6 @@ def count(self) -> int: def write(self, values: pa.RecordBatch) -> None: self._handle.write(values) - @property - def maybe_soma_joinid_shape(self) -> Optional[int]: - """Return the shape slot for the soma_joinid dim, if the array has one. - This is an important test-point and dev-internal access-point, - in particular, for the tiledbsoma-io experiment-level resizer. - - Lifecycle: - Maturing. - """ - return cast(Optional[int], self._handle.maybe_soma_joinid_shape) - - @property - def maybe_soma_joinid_maxshape(self) -> Optional[int]: - """Return the maxshape slot for the soma_joinid dim, if the array has one. - This is an important test-point and dev-internal access-point, - in particular, for the tiledbsoma-io experiment-level resizer. - - Lifecycle: - Maturing. - """ - return cast(Optional[int], self._handle.maybe_soma_joinid_maxshape) - - @property - def tiledbsoma_has_upgraded_domain(self) -> bool: - """Returns true if the array has the upgraded resizeable domain feature - from TileDB-SOMA 1.15: the array was created with this support, or it has - had ``.tiledbsoma_upgrade_domain`` applied to it. - - Lifecycle: - Maturing. - """ - return cast(bool, self._handle.tiledbsoma_has_upgraded_domain) - - def resize_soma_joinid_shape(self, newshape: int) -> None: - """Increases the shape of the dataframe on the ``soma_joinid`` index - column, if it indeed is an index column, leaving all other index columns - as-is. If the ``soma_joinid`` is not an index column, no change is made. - This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler - to keystroke, and handles the most common case for dataframe domain - expansion. Raises an error if the dataframe doesn't already have a - domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for - 1.15). - """ - self._handle.resize_soma_joinid_shape(newshape) - - def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: - """Increases the shape of the dataframe on the ``soma_joinid`` index - column, if it indeed is an index column, leaving all other index columns - as-is. If the ``soma_joinid`` is not an index column, no change is made. - This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler - to keystroke, and handles the most common case for dataframe domain - expansion. Raises an error if the dataframe doesn't already have a - domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for - 1.15). If ``check_only`` is ``True``, returns whether the operation - would succeed if attempted, and a reason why it would not. - """ - return cast( - StatusAndReason, self._handle.can_resize_soma_joinid_shape(newshape) - ) - - def upgrade_soma_joinid_shape(self, newshape: int) -> None: - """This is like ``upgrade_domain``, but it only applies the specified domain - update to the ``soma_joinid`` index column. Any other index columns have their - domain set to match the maxdomain. If the ``soma_joinid`` column is not an index - column at all, then no action is taken.""" - self._handle.upgrade_soma_joinid_shape(newshape) - - def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: - """This allows you to see if ``upgrade_soma_joinid_shape`` will succeed - before calling it. This is an important test-point and dev-internal - access-point, in particular, for the tiledbsoma-io experiment-level - resizer. If ``check_only`` is ``True``, returns whether the operation - would succeed if attempted, and a reason why it would not. - """ - return cast( - StatusAndReason, self._handle.can_upgrade_soma_joinid_shape(newshape) - ) - class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]): """Wrapper around a Pybind11 SOMAPointCloudDataFrame handle.""" From 908c93ea02bc6ef92deef596f7b0a1625ad2f5a8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Oct 2024 23:54:19 -0400 Subject: [PATCH 15/24] remove no-longer-used methods from _tdb_handles.py --- apis/python/src/tiledbsoma/_tdb_handles.py | 94 +--------------------- 1 file changed, 1 insertion(+), 93 deletions(-) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index 9eccf673cf..bc4e5c0ac5 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -18,7 +18,6 @@ Mapping, MutableMapping, Optional, - Sequence, Tuple, Type, TypeVar, @@ -34,7 +33,7 @@ from . import pytiledbsoma as clib from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error -from ._types import METADATA_TYPES, Metadatum, OpenTimestamp, StatusAndReason +from ._types import METADATA_TYPES, Metadatum, OpenTimestamp from .options._soma_tiledb_context import SOMATileDBContext RawHandle = Union[ @@ -394,109 +393,18 @@ class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]) _ARRAY_WRAPPED_TYPE = clib.SOMAPointCloudDataFrame - @property - def count(self) -> int: - return int(self._handle.count) - - def write(self, values: pa.RecordBatch) -> None: - self._handle.write(values) - class DenseNDArrayWrapper(SOMAArrayWrapper[clib.SOMADenseNDArray]): """Wrapper around a Pybind11 DenseNDArrayWrapper handle.""" _ARRAY_WRAPPED_TYPE = clib.SOMADenseNDArray - @property - def tiledbsoma_has_upgraded_shape(self) -> bool: - """Returns true if the array has the upgraded resizeable shape feature - from TileDB-SOMA 1.15: the array was created with this support, or it has - had ``.tiledbsoma_upgrade_shape`` applied to it. - - Lifecycle: - Maturing. - """ - return cast(bool, self._handle.tiledbsoma_has_upgraded_shape) - - def resize(self, newshape: Sequence[Union[int, None]]) -> None: - """Supported for ``SparseNDArray``; scheduled for implementation for - ``DenseNDArray`` in TileDB-SOMA 1.15 - """ - if clib.embedded_version_triple() >= (2, 27, 0): - self._handle.resize(newshape) - else: - raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0") - - def tiledbsoma_can_resize( - self, newshape: Sequence[Union[int, None]] - ) -> StatusAndReason: - """Supported for ``SparseNDArray``; scheduled for implementation for - ``DenseNDArray`` in TileDB-SOMA 1.15. - """ - if clib.embedded_version_triple() >= (2, 27, 0): - return cast(StatusAndReason, self._handle.tiledbsoma_can_resize(newshape)) - else: - raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0") - class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]): """Wrapper around a Pybind11 SparseNDArrayWrapper handle.""" _ARRAY_WRAPPED_TYPE = clib.SOMASparseNDArray - @property - def nnz(self) -> int: - return int(self._handle.nnz()) - - @property - def tiledbsoma_has_upgraded_shape(self) -> bool: - """Returns true if the array has the upgraded resizeable shape feature - from TileDB-SOMA 1.15: the array was created with this support, or it has - had ``.tiledbsoma_upgrade_shape`` applied to it. - - Lifecycle: - Maturing. - """ - return cast(bool, self._handle.tiledbsoma_has_upgraded_shape) - - def resize(self, newshape: Sequence[Union[int, None]]) -> None: - """Increases the shape of the array as specfied. Raises an error if the new - shape is less than the current shape in any dimension. Raises an error if - the new shape exceeds maxshape in any dimension. Raises an error if the - array doesn't already have a shape: in that case please call - tiledbsoma_upgrade_shape. - """ - self._handle.resize(newshape) - - def tiledbsoma_can_resize( - self, newshape: Sequence[Union[int, None]] - ) -> StatusAndReason: - """This allows you to see if ``resize`` will succeed before calling it. - This is an important test-point and dev-internal access-point, in - particular, for the tiledbsoma-io experiment-level resizer. If - ``check_only`` is ``True``, returns whether the operation would succeed - if attempted, and a reason why it would not. - """ - return cast(StatusAndReason, self._handle.can_resize(newshape)) - - def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: - """Allows the array to have a resizeable shape as described in the TileDB-SOMA - 1.15 release notes. Raises an error if the new shape exceeds maxshape in - any dimension. Raises an error if the array already has a shape. - """ - self._handle.tiledbsoma_upgrade_shape(newshape) - - def tiledbsoma_can_upgrade_shape( - self, newshape: Sequence[Union[int, None]] - ) -> StatusAndReason: - """Allows the array to have a resizeable shape as described in the TileDB-SOMA - 1.15 release notes. Raises an error if the new shape exceeds maxshape in - any dimension. Raises an error if the array already has a shape. - """ - return cast( - StatusAndReason, self._handle.tiledbsoma_can_upgrade_shape(newshape) - ) - class _DictMod(enum.Enum): """State machine to keep track of modifications to a dictionary. From 323d87592755ed4dac64c5320fa0b10bb98d1178 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 00:02:44 -0400 Subject: [PATCH 16/24] remove no-longer-used methods from _tdb_handles.py --- apis/python/src/tiledbsoma/_tdb_handles.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index bc4e5c0ac5..b1eae4b7e4 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -27,7 +27,6 @@ import attrs import numpy as np -import pyarrow as pa from somacore import options from typing_extensions import Literal, Self @@ -380,13 +379,6 @@ class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]): _ARRAY_WRAPPED_TYPE = clib.SOMADataFrame - @property - def count(self) -> int: - return int(self._handle.count) - - def write(self, values: pa.RecordBatch) -> None: - self._handle.write(values) - class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]): """Wrapper around a Pybind11 SOMAPointCloudDataFrame handle.""" From b93d0924c2d4d05d81f67d3ad58fae142912eeb4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 00:08:17 -0400 Subject: [PATCH 17/24] internal name-neaten --- apis/python/src/tiledbsoma/_tdb_handles.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index b1eae4b7e4..f98fe1684f 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -259,13 +259,13 @@ def from_soma_group_entry(cls, obj: Tuple[str, str]) -> "GroupEntry": raise SOMAError(f"internal error: unknown object type {uri}") -_GrpType = TypeVar("_GrpType", bound=clib.SOMAGroup) +_ClibGroupType = TypeVar("_ClibGroupType", bound=clib.SOMAGroup) -class SOMAGroupWrapper(Wrapper[_GrpType]): +class SOMAGroupWrapper(Wrapper[_ClibGroupType]): """Base class for Pybind11 SOMAGroupWrapper handles.""" - _GROUP_WRAPPED_TYPE: Type[_GrpType] + _GROUP_WRAPPED_TYPE: Type[_ClibGroupType] clib_type = "SOMAGroup" @@ -331,13 +331,13 @@ class SceneWrapper(SOMAGroupWrapper[clib.SOMAScene]): _GROUP_WRAPPED_TYPE = clib.SOMAScene -_ArrType = TypeVar("_ArrType", bound=clib.SOMAArray) +_CLibArrayType = TypeVar("_CLibArrayType", bound=clib.SOMAArray) -class SOMAArrayWrapper(Wrapper[_ArrType]): +class SOMAArrayWrapper(Wrapper[_CLibArrayType]): """Base class for Pybind11 SOMAArrayWrapper handles.""" - _ARRAY_WRAPPED_TYPE: Type[_ArrType] + _ARRAY_WRAPPED_TYPE: Type[_CLibArrayType] clib_type = "SOMAArray" From d411c3983fdb1d0a78e84d89033c2fabbb49b45f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 00:27:21 -0400 Subject: [PATCH 18/24] internal name-neatens --- apis/python/src/tiledbsoma/_tdb_handles.py | 26 +++++++++++----------- apis/python/src/tiledbsoma/io/ingest.py | 6 ++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index f98fe1684f..fc960ab213 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -35,7 +35,7 @@ from ._types import METADATA_TYPES, Metadatum, OpenTimestamp from .options._soma_tiledb_context import SOMATileDBContext -RawHandle = Union[ +CLibHandle = Union[ clib.SOMAArray, clib.SOMADataFrame, clib.SOMAPointCloudDataFrame, @@ -48,8 +48,8 @@ clib.SOMAScene, clib.SOMAMultiscaleImage, ] -_RawHdl_co = TypeVar("_RawHdl_co", bound=RawHandle, covariant=True) -"""A raw TileDB object. Covariant because Handles are immutable enough.""" +_CLibHandle_co = TypeVar("_CLibHandle_co", bound=CLibHandle, covariant=True) +"""A handle to a pybind11-managed libtiledbsoma object. Covariant because Handles are immutable enough.""" def open( @@ -58,7 +58,7 @@ def open( context: SOMATileDBContext, timestamp: Optional[OpenTimestamp], clib_type: Optional[str] = None, -) -> "Wrapper[RawHandle]": +) -> "Wrapper[CLibHandle]": """Determine whether the URI is an array or group, and open it.""" open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write @@ -100,7 +100,7 @@ def open( @attrs.define(eq=False, hash=False, slots=False) -class Wrapper(Generic[_RawHdl_co], metaclass=abc.ABCMeta): +class Wrapper(Generic[_CLibHandle_co], metaclass=abc.ABCMeta): """Wrapper for TileDB handles to manage lifecycle and metadata. Callers may read and use (non-underscored) members but should never set @@ -111,7 +111,7 @@ class Wrapper(Generic[_RawHdl_co], metaclass=abc.ABCMeta): mode: options.OpenMode context: SOMATileDBContext timestamp_ms: int - _handle: _RawHdl_co + _handle: _CLibHandle_co closed: bool = attrs.field(default=False, init=False) clib_type: Optional[str] = None @@ -170,7 +170,7 @@ def _opener( mode: options.OpenMode, context: SOMATileDBContext, timestamp: int, - ) -> _RawHdl_co: + ) -> _CLibHandle_co: """Opens and returns a TileDB object specific to this type.""" raise NotImplementedError() @@ -188,7 +188,7 @@ def reopen( # Covariant types should normally not be in parameters, but this is for # internal use only so it's OK. - def _do_initial_reads(self, reader: _RawHdl_co) -> None: # type: ignore[misc] + def _do_initial_reads(self, reader: _CLibHandle_co) -> None: # type: ignore[misc] """Final setup step before returning the Handle. This is passed a raw TileDB object opened in read mode, since writers @@ -198,7 +198,7 @@ def _do_initial_reads(self, reader: _RawHdl_co) -> None: # type: ignore[misc] self.metadata = MetadataWrapper(self, dict(reader.meta)) @property - def reader(self) -> _RawHdl_co: + def reader(self) -> _CLibHandle_co: """Accessor to assert that you are working in read mode.""" if self.closed: raise SOMAError(f"{self} is closed") @@ -207,7 +207,7 @@ def reader(self) -> _RawHdl_co: raise SOMAError(f"cannot read from {self}; it is open for writing") @property - def writer(self) -> _RawHdl_co: + def writer(self) -> _CLibHandle_co: """Accessor to assert that you are working in write mode.""" if self.closed: raise SOMAError(f"{self} is closed") @@ -240,7 +240,7 @@ def __del__(self) -> None: self.close() -AnyWrapper = Wrapper[RawHandle] +AnyWrapper = Wrapper[CLibHandle] """Non-instantiable type representing any Handle.""" @@ -360,7 +360,7 @@ def _opener( timestamp=(0, timestamp), ) - def _do_initial_reads(self, reader: RawHandle) -> None: + def _do_initial_reads(self, reader: CLibHandle) -> None: """Final setup step before returning the Handle. This is passed a raw TileDB object opened in read mode, since writers @@ -459,7 +459,7 @@ class MetadataWrapper(MutableMapping[str, Any]): through to the backing store and the cache is updated to match. """ - owner: Wrapper[RawHandle] + owner: Wrapper[CLibHandle] cache: Dict[str, Any] _mods: Dict[str, "_DictMod"] = attrs.field(init=False, factory=dict) """Tracks the modifications we have made to cache entries.""" diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index d7640ef97b..a4e2af67d8 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -67,7 +67,7 @@ from .._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED from .._soma_array import SOMAArray from .._soma_object import AnySOMAObject, SOMAObject -from .._tdb_handles import RawHandle +from .._tdb_handles import CLibHandle from .._types import ( _INGEST_MODES, INGEST_MODES, @@ -107,7 +107,7 @@ from ._util import get_arrow_str_format, read_h5ad _NDArr = TypeVar("_NDArr", bound=NDArray) -_TDBO = TypeVar("_TDBO", bound=SOMAObject[RawHandle]) +_TDBO = TypeVar("_TDBO", bound=SOMAObject[CLibHandle]) def add_metadata(obj: SOMAObject[Any], additional_metadata: AdditionalMetadata) -> None: @@ -1762,7 +1762,7 @@ def add_matrix_to_collection( coll_uri = f"{meas.uri}/{collection_name}" if collection_name in meas: - coll = cast(Collection[RawHandle], meas[collection_name]) + coll = cast(Collection[CLibHandle], meas[collection_name]) else: coll = _create_or_open_collection( Collection, From 0d5a2edbcf604ee549cbc85604d11e4b8e4a7bff Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 09:59:07 -0400 Subject: [PATCH 19/24] CLibHandle to _soma_object.py --- apis/python/src/tiledbsoma/_soma_object.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/apis/python/src/tiledbsoma/_soma_object.py b/apis/python/src/tiledbsoma/_soma_object.py index ec153ed6c0..d155fdbb40 100644 --- a/apis/python/src/tiledbsoma/_soma_object.py +++ b/apis/python/src/tiledbsoma/_soma_object.py @@ -12,6 +12,7 @@ from typing_extensions import Self from . import _constants, _tdb_handles +from . import pytiledbsoma as clib from ._exception import SOMAError from ._types import OpenTimestamp from ._util import check_type, ms_to_datetime @@ -27,6 +28,20 @@ Covariant because ``_handle`` is read-only. """ +CLibHandle = Union[ + clib.SOMAArray, + clib.SOMADataFrame, + clib.SOMAPointCloudDataFrame, + clib.SOMASparseNDArray, + clib.SOMADenseNDArray, + clib.SOMAGroup, + clib.SOMACollection, + clib.SOMAMeasurement, + clib.SOMAExperiment, + clib.SOMAScene, + clib.SOMAMultiscaleImage, +] + class SOMAObject(somacore.SOMAObject, Generic[_WrapperType_co]): """Base class for all TileDB SOMA objects. @@ -94,6 +109,8 @@ def open( Lifecycle: Maturing. """ + if mode not in ("r", "w"): + raise ValueError(f"Invalid open mode {mode!r}") del platform_config # unused context = _validate_soma_tiledb_context(context) handle = _tdb_handles.open( From c4928048cd9cb593b6cff5f4f5e2a00c6d4afa90 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 11:33:57 -0400 Subject: [PATCH 20/24] move init of clib_handle --- apis/python/src/tiledbsoma/_collection.py | 1 + apis/python/src/tiledbsoma/_dataframe.py | 11 +++++++++ apis/python/src/tiledbsoma/_dense_nd_array.py | 1 + apis/python/src/tiledbsoma/_experiment.py | 2 ++ .../src/tiledbsoma/_geometry_dataframe.py | 1 + apis/python/src/tiledbsoma/_measurement.py | 2 ++ .../src/tiledbsoma/_multiscale_image.py | 1 + .../src/tiledbsoma/_point_cloud_dataframe.py | 1 + apis/python/src/tiledbsoma/_scene.py | 2 ++ apis/python/src/tiledbsoma/_soma_object.py | 23 ++++++++++++++++++- .../python/src/tiledbsoma/_sparse_nd_array.py | 1 + .../src/tiledbsoma/_spatial_dataframe.py | 1 + 12 files changed, 46 insertions(+), 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py index 1e486ab002..b9b5d80f18 100644 --- a/apis/python/src/tiledbsoma/_collection.py +++ b/apis/python/src/tiledbsoma/_collection.py @@ -519,6 +519,7 @@ class Collection( # type: ignore[misc] # __eq__ false positive __slots__ = () _wrapper_type = _tdb_handles.CollectionWrapper + _clib_handle_type = clib.SOMACollection @typeguard_ignore diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index e188ef8d62..5d182f54e5 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -136,6 +136,9 @@ class DataFrame(SOMAArray, somacore.DataFrame): """ _wrapper_type = DataFrameWrapper + _clib_handle_type = clib.SOMADataFrame + + """XXX comment me.""" @classmethod def open( @@ -354,8 +357,16 @@ def create( raise map_exception_for_create(e, uri) from None handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) + clib_handle = cls._clib_handle_type.open( + uri, + clib.OpenMode.write, + context.native_context, + timestamp=(0, timestamp_ms), + ) + retval = cls( handle, + clib_handle=clib_handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py index a7ac342fc2..6d1b875682 100644 --- a/apis/python/src/tiledbsoma/_dense_nd_array.py +++ b/apis/python/src/tiledbsoma/_dense_nd_array.py @@ -85,6 +85,7 @@ class DenseNDArray(NDArray, somacore.DenseNDArray): __slots__ = () _wrapper_type = DenseNDArrayWrapper + _clib_handle_type = clib.SOMADenseNDArray @classmethod def create( diff --git a/apis/python/src/tiledbsoma/_experiment.py b/apis/python/src/tiledbsoma/_experiment.py index 5a57662761..b54068aca7 100644 --- a/apis/python/src/tiledbsoma/_experiment.py +++ b/apis/python/src/tiledbsoma/_experiment.py @@ -12,6 +12,7 @@ from typing_extensions import Self from . import _tdb_handles +from . import pytiledbsoma as clib from ._collection import Collection, CollectionBase from ._dataframe import DataFrame from ._indexer import IntIndexer @@ -69,6 +70,7 @@ class Experiment( # type: ignore[misc] # __eq__ false positive __slots__ = () _wrapper_type = _tdb_handles.ExperimentWrapper + _clib_handle_type = clib.SOMAExperiment _subclass_constrained_soma_types = { "obs": ("SOMADataFrame",), diff --git a/apis/python/src/tiledbsoma/_geometry_dataframe.py b/apis/python/src/tiledbsoma/_geometry_dataframe.py index 6d137ae6e3..eeb834c793 100644 --- a/apis/python/src/tiledbsoma/_geometry_dataframe.py +++ b/apis/python/src/tiledbsoma/_geometry_dataframe.py @@ -35,6 +35,7 @@ class GeometryDataFrame(somacore.GeometryDataFrame): """ __slots__ = () + # XXX _clib_handle_type = clib.XXX # Lifecycle diff --git a/apis/python/src/tiledbsoma/_measurement.py b/apis/python/src/tiledbsoma/_measurement.py index 6edaff0940..4a6d064e41 100644 --- a/apis/python/src/tiledbsoma/_measurement.py +++ b/apis/python/src/tiledbsoma/_measurement.py @@ -11,6 +11,7 @@ from somacore import measurement from . import _tdb_handles +from . import pytiledbsoma as clib from ._collection import Collection, CollectionBase from ._dataframe import DataFrame from ._dense_nd_array import DenseNDArray @@ -72,6 +73,7 @@ class Measurement( # type: ignore[misc] # __eq__ false positive __slots__ = () _wrapper_type = _tdb_handles.MeasurementWrapper + _clib_handle_type = clib.SOMAMeasurement _subclass_constrained_soma_types = { "var": ("SOMADataFrame",), diff --git a/apis/python/src/tiledbsoma/_multiscale_image.py b/apis/python/src/tiledbsoma/_multiscale_image.py index f60418a638..860c50d019 100644 --- a/apis/python/src/tiledbsoma/_multiscale_image.py +++ b/apis/python/src/tiledbsoma/_multiscale_image.py @@ -116,6 +116,7 @@ class MultiscaleImage( # type: ignore[misc] # __eq__ false positive __slots__ = ("_schema", "_coord_space", "_levels") _wrapper_type = _tdb_handles.MultiscaleImageWrapper + _clib_handle_type = clib.SOMAMultiscaleImage _level_prefix: Final = "soma_level_" diff --git a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py index d1c09d1dc1..bfebb700ad 100644 --- a/apis/python/src/tiledbsoma/_point_cloud_dataframe.py +++ b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py @@ -64,6 +64,7 @@ class PointCloudDataFrame(SpatialDataFrame, somacore.PointCloudDataFrame): __slots__ = ("_coord_space",) _wrapper_type = PointCloudDataFrameWrapper + _clib_handle_type = clib.SOMAPointCloudDataFrame @classmethod def create( diff --git a/apis/python/src/tiledbsoma/_scene.py b/apis/python/src/tiledbsoma/_scene.py index ef931d9ae5..4f84622251 100644 --- a/apis/python/src/tiledbsoma/_scene.py +++ b/apis/python/src/tiledbsoma/_scene.py @@ -16,6 +16,7 @@ ) from . import _funcs, _tdb_handles +from . import pytiledbsoma as clib from ._collection import Collection, CollectionBase from ._constants import SOMA_COORDINATE_SPACE_METADATA_KEY from ._exception import SOMAError @@ -46,6 +47,7 @@ class Scene( # type: ignore[misc] # __eq__ false positive __slots__ = ("_coord_space",) _wrapper_type = _tdb_handles.SceneWrapper + _clib_handle_type = clib.SOMAScene _subclass_constrained_soma_types = { "img": ("SOMACollection",), diff --git a/apis/python/src/tiledbsoma/_soma_object.py b/apis/python/src/tiledbsoma/_soma_object.py index d155fdbb40..dc290bce80 100644 --- a/apis/python/src/tiledbsoma/_soma_object.py +++ b/apis/python/src/tiledbsoma/_soma_object.py @@ -66,6 +66,21 @@ class SOMAObject(somacore.SOMAObject, Generic[_WrapperType_co]): ] """Class variable of the Wrapper class used to open this object type.""" + _clib_handle_type: Union[ + Type[clib.SOMAArray], + Type[clib.SOMADataFrame], + Type[clib.SOMAPointCloudDataFrame], + Type[clib.SOMASparseNDArray], + Type[clib.SOMADenseNDArray], + Type[clib.SOMAGroup], + Type[clib.SOMACollection], + Type[clib.SOMAMeasurement], + Type[clib.SOMAExperiment], + Type[clib.SOMAScene], + Type[clib.SOMAMultiscaleImage], + ] + """XXX comment me.""" + __slots__ = ("_close_stack", "_handle", "_clib_handle") @classmethod @@ -132,6 +147,7 @@ def __init__( _tdb_handles.SparseNDArrayWrapper, ], *, + clib_handle: Any = None, # XXX TEMP _dont_call_this_use_create_or_open_instead: str = "unset", ): """Internal-only common initializer steps. @@ -159,7 +175,12 @@ def __init__( f" internal use only." ) self._handle = handle - self._clib_handle = handle._handle + + if clib_handle is None: + self._clib_handle = handle._handle + else: + self._clib_handle = clib_handle + self._close_stack.enter_context(self._handle) def reopen( diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 13878818d5..5b5c563996 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -106,6 +106,7 @@ class SparseNDArray(NDArray, somacore.SparseNDArray): __slots__ = () _wrapper_type = SparseNDArrayWrapper + _clib_handle_type = clib.SOMASparseNDArray # Inherited from somacore # * ndim accessor diff --git a/apis/python/src/tiledbsoma/_spatial_dataframe.py b/apis/python/src/tiledbsoma/_spatial_dataframe.py index 0e8f8136ec..abfba65e82 100644 --- a/apis/python/src/tiledbsoma/_spatial_dataframe.py +++ b/apis/python/src/tiledbsoma/_spatial_dataframe.py @@ -26,6 +26,7 @@ class SpatialDataFrame(SOMAArray): __slots__ = () + # XXX _clib_handle_type = clib.XXX def keys(self) -> Tuple[str, ...]: """Returns the names of the columns when read back as a spatial dataframe. From 210ce72ec3fa397c00aa5a451bcd49776159af9d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 18:42:21 -0400 Subject: [PATCH 21/24] move more inits of _clib_handle --- apis/python/src/tiledbsoma/_collection.py | 1 + apis/python/src/tiledbsoma/_dense_nd_array.py | 7 +++++++ apis/python/src/tiledbsoma/_multiscale_image.py | 10 ++++++++++ apis/python/src/tiledbsoma/_sparse_nd_array.py | 7 +++++++ 4 files changed, 25 insertions(+) diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py index b9b5d80f18..d0e1448081 100644 --- a/apis/python/src/tiledbsoma/_collection.py +++ b/apis/python/src/tiledbsoma/_collection.py @@ -115,6 +115,7 @@ def create( timestamp=(0, timestamp_ms), ) handle = wrapper.open(uri, "w", context, tiledb_timestamp) + return cls( handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py index 6d1b875682..e6fc85eb7e 100644 --- a/apis/python/src/tiledbsoma/_dense_nd_array.py +++ b/apis/python/src/tiledbsoma/_dense_nd_array.py @@ -163,8 +163,15 @@ def create( raise map_exception_for_create(e, uri) from None handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) + clib_handle = cls._clib_handle_type.open( + uri, + clib.OpenMode.write, + context.native_context, + timestamp=(0, timestamp_ms), + ) return cls( handle, + clib_handle=clib_handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) diff --git a/apis/python/src/tiledbsoma/_multiscale_image.py b/apis/python/src/tiledbsoma/_multiscale_image.py index 860c50d019..0de1386bfd 100644 --- a/apis/python/src/tiledbsoma/_multiscale_image.py +++ b/apis/python/src/tiledbsoma/_multiscale_image.py @@ -198,8 +198,18 @@ def create( ) handle.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] = schema_str handle.metadata[SOMA_COORDINATE_SPACE_METADATA_KEY] = coord_space_str + # XXX + # clib_handle = cls._clib_handle_type.open( + # uri, + # mode=clib.OpenMode.write, + # context=context.native_context, + # timestamp=(0, timestamp_ms), + # ) + return cls( handle, + # XXX + # clib_handle=clib_handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) except SOMAError as e: diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 5b5c563996..2161d14ccb 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -215,8 +215,15 @@ def create( raise map_exception_for_create(e, uri) from None handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) + clib_handle = cls._clib_handle_type.open( + uri, + clib.OpenMode.write, + context.native_context, + timestamp=(0, timestamp_ms), + ) return cls( handle, + clib_handle=clib_handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) From 2ce67296376db4e04acaf4a40e7bc09e462d95c4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 18:46:36 -0400 Subject: [PATCH 22/24] split out _metadata_wrapper.py from _tdb_handles.py --- .../src/tiledbsoma/_metadata_wrapper.py | 177 ++++++++++++++++++ apis/python/src/tiledbsoma/_tdb_handles.py | 149 +-------------- 2 files changed, 180 insertions(+), 146 deletions(-) create mode 100644 apis/python/src/tiledbsoma/_metadata_wrapper.py diff --git a/apis/python/src/tiledbsoma/_metadata_wrapper.py b/apis/python/src/tiledbsoma/_metadata_wrapper.py new file mode 100644 index 0000000000..e212aa5734 --- /dev/null +++ b/apis/python/src/tiledbsoma/_metadata_wrapper.py @@ -0,0 +1,177 @@ +# Copyright (c) 2021-2024 The Chan Zuckerberg Initiative Foundation +# Copyright (c) 2021-2024 TileDB, Inc. +# +# Licensed under the MIT License. + +"""Abstractions to more easily manage read and write access to TileDB data. + +``open``, ``ArrayWrapper.open``, ``GroupWrapper.open`` are the important parts. +""" + +import abc +import enum +from typing import ( + Any, + Dict, + Generic, + Iterator, + Mapping, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, + cast, +) + +import attrs +import numpy as np +from somacore import options +from typing_extensions import Literal, Self + +from . import pytiledbsoma as clib +from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error +from ._types import METADATA_TYPES, Metadatum, OpenTimestamp +from .options._soma_tiledb_context import SOMATileDBContext + +class _DictMod(enum.Enum): + """State machine to keep track of modifications to a dictionary. + + This whole thing is a hack to allow users to treat the metadata dict + like an actual dictionary because tiledb currently does not support multiple + modifications to the same key (e.g., add-then-delete a metadata entry has + undesired results) [sc-25089]. + """ + + # Initially-absent keys are either added or not (added then removed). + ABSENT = enum.auto() + """The key is not present in the dict. Initial state.""" + ADDED = enum.auto() + """The key was originally ABSENT but has been added.""" + + # Initially-present keys can be either updated or deleted. + PRESENT = enum.auto() + """The key is in the dict and is unchanged. Initial state.""" + UPDATED = enum.auto() + """The key was originally PRESENT but has been changed.""" + DELETED = enum.auto() + """The key was originally PRESENT but has been deleted.""" + + @classmethod + def start_state(cls, dct: Mapping[Any, Any], key: Any) -> "_DictMod": + """Returns the starting state for a DictMod given the key of dct.""" + return cls.PRESENT if key in dct else cls.ABSENT + + def next_state(self, action: Literal["set", "del"]) -> "_DictMod": + """Determines the next state of an entry given the action.""" + return { + _DictMod.ABSENT: { + "set": _DictMod.ADDED, + }, + _DictMod.ADDED: { + "set": _DictMod.ADDED, + "del": _DictMod.ABSENT, + }, + _DictMod.PRESENT: { + "set": _DictMod.UPDATED, + "del": _DictMod.DELETED, + }, + _DictMod.UPDATED: { + "set": _DictMod.UPDATED, + "del": _DictMod.DELETED, + }, + _DictMod.DELETED: { + "set": _DictMod.UPDATED, + }, + }[self][action] + + +@attrs.define(frozen=True) +class MetadataWrapper(MutableMapping[str, Any]): + """A wrapper storing the metadata of some TileDB object. + + Because the view of metadata does not change after open time, we immediately + cache all of it and use that to handle all reads. Writes are then proxied + through to the backing store and the cache is updated to match. + """ + + # XXX TEMP + # owner: Wrapper[CLibHandle] + owner: Any + + cache: Dict[str, Any] + _mods: Dict[str, "_DictMod"] = attrs.field(init=False, factory=dict) + """Tracks the modifications we have made to cache entries.""" + + def __len__(self) -> int: + self.owner._check_open() + return len(self.cache) + + def __iter__(self) -> Iterator[str]: + self.owner._check_open() + return iter(self.cache) + + def __getitem__(self, key: str) -> Any: + self.owner._check_open() + return self.cache[key] + + def __setitem__(self, key: str, value: Any) -> None: + self.owner.writer # Ensures we're open in write mode. + state = self._current_state(key) + _check_metadata_type(key, value) + self.cache[key] = value + self._mods[key] = state.next_state("set") + + def __delitem__(self, key: str) -> None: + self.owner.writer # Ensures we're open in write mode. + state = self._current_state(key) + del self.cache[key] + self._mods[key] = state.next_state("del") + + def _current_state(self, key: str) -> _DictMod: + return self._mods.get(key, _DictMod.start_state(self.cache, key)) + + def _write(self) -> None: + """Writes out metadata changes, if there were any.""" + if not self._mods: + # There were no changes (e.g., it's a read handle). Do nothing. + return + # Only try to get the writer if there are changes to be made. + for key, mod in self._mods.items(): + if mod in (_DictMod.ADDED, _DictMod.UPDATED): + set_metadata = self.owner._handle.set_metadata + val = self.cache[key] + if isinstance(val, str): + set_metadata(key, np.array([val], "S")) + else: + set_metadata(key, np.array([val])) + if mod is _DictMod.DELETED: + self.owner._handle.delete_metadata(key) + + # Temporary hack: When we flush writes, note that the cache + # is back in sync with disk. + self._mods.clear() + + def __repr__(self) -> str: + prefix = f"{type(self).__name__}({self.owner})" + if self.owner.closed: + return f"<{prefix}>" + return f"<{prefix} {self.cache}>" + + +def _check_metadata_type(key: str, obj: Metadatum) -> None: + """Pre-checks that a metadata entry can be stored in an array. + + These checks are reproduced from the TileDB Python metadata-setting methods, + but are slightly more restrictive than what TileDB allows in general: + TileDB allows (some) arrays as metadata values, but the SOMA spec does not + allow arrays of any kind. + + We have to pre-check since we don't write metadata changes until closing. + """ + if not isinstance(key, str): + raise TypeError(f"metadata keys must be strings, not {type(key)}") + if isinstance(obj, METADATA_TYPES): + return + raise TypeError(f"cannot store {type(obj)} instance as metadata") diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index fc960ab213..bdb5e412f5 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -9,14 +9,10 @@ """ import abc -import enum from typing import ( Any, Dict, Generic, - Iterator, - Mapping, - MutableMapping, Optional, Tuple, Type, @@ -26,13 +22,13 @@ ) import attrs -import numpy as np from somacore import options -from typing_extensions import Literal, Self +from typing_extensions import Self from . import pytiledbsoma as clib from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error -from ._types import METADATA_TYPES, Metadatum, OpenTimestamp +from ._metadata_wrapper import MetadataWrapper +from ._types import OpenTimestamp from .options._soma_tiledb_context import SOMATileDBContext CLibHandle = Union[ @@ -396,142 +392,3 @@ class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]): """Wrapper around a Pybind11 SparseNDArrayWrapper handle.""" _ARRAY_WRAPPED_TYPE = clib.SOMASparseNDArray - - -class _DictMod(enum.Enum): - """State machine to keep track of modifications to a dictionary. - - This whole thing is a hack to allow users to treat the metadata dict - like an actual dictionary because tiledb currently does not support multiple - modifications to the same key (e.g., add-then-delete a metadata entry has - undesired results) [sc-25089]. - """ - - # Initially-absent keys are either added or not (added then removed). - ABSENT = enum.auto() - """The key is not present in the dict. Initial state.""" - ADDED = enum.auto() - """The key was originally ABSENT but has been added.""" - - # Initially-present keys can be either updated or deleted. - PRESENT = enum.auto() - """The key is in the dict and is unchanged. Initial state.""" - UPDATED = enum.auto() - """The key was originally PRESENT but has been changed.""" - DELETED = enum.auto() - """The key was originally PRESENT but has been deleted.""" - - @classmethod - def start_state(cls, dct: Mapping[Any, Any], key: Any) -> "_DictMod": - """Returns the starting state for a DictMod given the key of dct.""" - return cls.PRESENT if key in dct else cls.ABSENT - - def next_state(self, action: Literal["set", "del"]) -> "_DictMod": - """Determines the next state of an entry given the action.""" - return { - _DictMod.ABSENT: { - "set": _DictMod.ADDED, - }, - _DictMod.ADDED: { - "set": _DictMod.ADDED, - "del": _DictMod.ABSENT, - }, - _DictMod.PRESENT: { - "set": _DictMod.UPDATED, - "del": _DictMod.DELETED, - }, - _DictMod.UPDATED: { - "set": _DictMod.UPDATED, - "del": _DictMod.DELETED, - }, - _DictMod.DELETED: { - "set": _DictMod.UPDATED, - }, - }[self][action] - - -@attrs.define(frozen=True) -class MetadataWrapper(MutableMapping[str, Any]): - """A wrapper storing the metadata of some TileDB object. - - Because the view of metadata does not change after open time, we immediately - cache all of it and use that to handle all reads. Writes are then proxied - through to the backing store and the cache is updated to match. - """ - - owner: Wrapper[CLibHandle] - cache: Dict[str, Any] - _mods: Dict[str, "_DictMod"] = attrs.field(init=False, factory=dict) - """Tracks the modifications we have made to cache entries.""" - - def __len__(self) -> int: - self.owner._check_open() - return len(self.cache) - - def __iter__(self) -> Iterator[str]: - self.owner._check_open() - return iter(self.cache) - - def __getitem__(self, key: str) -> Any: - self.owner._check_open() - return self.cache[key] - - def __setitem__(self, key: str, value: Any) -> None: - self.owner.writer # Ensures we're open in write mode. - state = self._current_state(key) - _check_metadata_type(key, value) - self.cache[key] = value - self._mods[key] = state.next_state("set") - - def __delitem__(self, key: str) -> None: - self.owner.writer # Ensures we're open in write mode. - state = self._current_state(key) - del self.cache[key] - self._mods[key] = state.next_state("del") - - def _current_state(self, key: str) -> _DictMod: - return self._mods.get(key, _DictMod.start_state(self.cache, key)) - - def _write(self) -> None: - """Writes out metadata changes, if there were any.""" - if not self._mods: - # There were no changes (e.g., it's a read handle). Do nothing. - return - # Only try to get the writer if there are changes to be made. - for key, mod in self._mods.items(): - if mod in (_DictMod.ADDED, _DictMod.UPDATED): - set_metadata = self.owner._handle.set_metadata - val = self.cache[key] - if isinstance(val, str): - set_metadata(key, np.array([val], "S")) - else: - set_metadata(key, np.array([val])) - if mod is _DictMod.DELETED: - self.owner._handle.delete_metadata(key) - - # Temporary hack: When we flush writes, note that the cache - # is back in sync with disk. - self._mods.clear() - - def __repr__(self) -> str: - prefix = f"{type(self).__name__}({self.owner})" - if self.owner.closed: - return f"<{prefix}>" - return f"<{prefix} {self.cache}>" - - -def _check_metadata_type(key: str, obj: Metadatum) -> None: - """Pre-checks that a metadata entry can be stored in an array. - - These checks are reproduced from the TileDB Python metadata-setting methods, - but are slightly more restrictive than what TileDB allows in general: - TileDB allows (some) arrays as metadata values, but the SOMA spec does not - allow arrays of any kind. - - We have to pre-check since we don't write metadata changes until closing. - """ - if not isinstance(key, str): - raise TypeError(f"metadata keys must be strings, not {type(key)}") - if isinstance(obj, METADATA_TYPES): - return - raise TypeError(f"cannot store {type(obj)} instance as metadata") From 05a40aa9a8ad24182bd4f39a4ce07b596cdd872b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 18:54:00 -0400 Subject: [PATCH 23/24] run pre-commit to neaten imports --- apis/python/src/tiledbsoma/_metadata_wrapper.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/apis/python/src/tiledbsoma/_metadata_wrapper.py b/apis/python/src/tiledbsoma/_metadata_wrapper.py index e212aa5734..040559d05a 100644 --- a/apis/python/src/tiledbsoma/_metadata_wrapper.py +++ b/apis/python/src/tiledbsoma/_metadata_wrapper.py @@ -8,32 +8,21 @@ ``open``, ``ArrayWrapper.open``, ``GroupWrapper.open`` are the important parts. """ -import abc import enum from typing import ( Any, Dict, - Generic, Iterator, Mapping, MutableMapping, - Optional, - Tuple, - Type, - TypeVar, - Union, - cast, ) import attrs import numpy as np -from somacore import options -from typing_extensions import Literal, Self +from typing_extensions import Literal + +from ._types import METADATA_TYPES, Metadatum -from . import pytiledbsoma as clib -from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error -from ._types import METADATA_TYPES, Metadatum, OpenTimestamp -from .options._soma_tiledb_context import SOMATileDBContext class _DictMod(enum.Enum): """State machine to keep track of modifications to a dictionary. From 7b3763ae62b3feda4581fdbb10508d8845173b04 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Oct 2024 19:00:21 -0400 Subject: [PATCH 24/24] remove opener callback in _factory.py --- apis/python/src/tiledbsoma/_factory.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/apis/python/src/tiledbsoma/_factory.py b/apis/python/src/tiledbsoma/_factory.py index bf5ec7d2f5..7170730587 100644 --- a/apis/python/src/tiledbsoma/_factory.py +++ b/apis/python/src/tiledbsoma/_factory.py @@ -8,7 +8,6 @@ """ from typing import ( - Callable, Dict, Optional, Type, @@ -123,7 +122,7 @@ def open( """ context = _validate_soma_tiledb_context(context) obj: SOMAObject[_Wrapper] = _open_internal( # type: ignore[valid-type] - _tdb_handles.open, uri, mode, context, tiledb_timestamp + uri, mode, context, tiledb_timestamp ) try: if soma_type: @@ -144,16 +143,14 @@ def open( def _open_internal( - opener: Callable[ - [str, options.OpenMode, SOMATileDBContext, Optional[OpenTimestamp]], _Wrapper - ], uri: str, mode: options.OpenMode, context: SOMATileDBContext, timestamp: Optional[OpenTimestamp], ) -> SOMAObject[_Wrapper]: """Lower-level open function for internal use only.""" - handle = opener(uri, mode, context, timestamp) + # XXX temp cast + handle = cast(_Wrapper, _tdb_handles.open(uri, mode, context, timestamp)) try: return reify_handle(handle) except Exception: