diff --git a/.github/workflows/python-ci-packaging.yml b/.github/workflows/python-ci-packaging.yml index 3bb9da2e3b..4f81b35680 100644 --- a/.github/workflows/python-ci-packaging.yml +++ b/.github/workflows/python-ci-packaging.yml @@ -65,7 +65,7 @@ jobs: apt-get install --yes cmake git python-is-python3 python3 python3-pip python3-venv unzip wget - uses: actions/checkout@v4 with: - fetch-depth: 0 # for setuptools-scm + fetch-depth: 0 # for setuptools-scm - name: Configure Git run: | # This is a permissions quirk due to running Git as root inside of a Docker container @@ -76,7 +76,7 @@ jobs: run: | mkdir -p external # Please do not edit manually -- let scripts/update-tiledb-version.py update this - wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-linux-x86_64-2.26.1-db1cee4.tar.gz + wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-linux-x86_64-2.26.2-30fc114.tar.gz tar -C external -xzf tiledb-linux-x86_64-*.tar.gz ls external/lib/ echo "LD_LIBRARY_PATH=$(pwd)/external/lib" >> $GITHUB_ENV @@ -101,7 +101,7 @@ jobs: run: | python --version python -m venv ./venv-soma - ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse wheel + ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse 'setuptools>=70.1' wheel ./venv-soma/bin/pip list - name: Build wheel run: | @@ -164,7 +164,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # for setuptools-scm + fetch-depth: 0 # for setuptools-scm - name: Check if System Integrity Protection (SIP) is enabled run: csrutil status - name: Install pre-built libtiledb @@ -172,7 +172,7 @@ jobs: run: | mkdir -p external # Please do not edit manually -- let scripts/update-tiledb-version.py update this - wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-x86_64-2.26.1-db1cee4.tar.gz + wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-x86_64-2.26.2-30fc114.tar.gz tar -C external -xzf tiledb-macos-x86_64-*.tar.gz ls external/lib/ echo "DYLD_LIBRARY_PATH=$(pwd)/external/lib" >> $GITHUB_ENV @@ -201,7 +201,7 @@ jobs: run: | python --version python -m venv ./venv-soma - ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse wheel setuptools + ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse wheel 'setuptools>=70.1' ./venv-soma/bin/pip list - name: Build wheel run: | @@ -257,17 +257,17 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # for setuptools-scm + fetch-depth: 0 # for setuptools-scm - name: Install pre-built libtiledb run: | mkdir -p external if [ `uname -s` == "Darwin" ]; then # Please do not edit manually -- let scripts/update-tiledb-version.py update this - wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-x86_64-2.26.1-db1cee4.tar.gz + wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-x86_64-2.26.2-30fc114.tar.gz else # Please do not edit manually -- let scripts/update-tiledb-version.py update this - wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-linux-x86_64-2.26.1-db1cee4.tar.gz + wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-linux-x86_64-2.26.2-30fc114.tar.gz fi tar -C external -xzf tiledb-*.tar.gz ls external/lib/ @@ -302,7 +302,7 @@ jobs: run: | python --version python -m venv ./venv-soma - ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse wheel setuptools + ./venv-soma/bin/pip install --prefer-binary pybind11-global typeguard sparse wheel 'setuptools>=70.1' ./venv-soma/bin/pip list - name: Install TileDB-SOMA-Py with setuptools and --libtiledbsoma run: | @@ -359,13 +359,13 @@ jobs: - uses: actions/checkout@v4 with: path: TileDB-SOMA - fetch-depth: 0 # for setuptools-scm + fetch-depth: 0 # for setuptools-scm - name: Setup Python uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install dependencies - run: pip install --prefer-binary pybind11 wheel + run: pip install --prefer-binary pybind11 'setuptools>=70.1' wheel - name: Build source tarball (sdist) run: | cd TileDB-SOMA/apis/python diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b6238b146..a4f2c41456 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # Pandas 2.x types (e.g. `pd.Series[Any]`). See `_types.py` or https://github.com/single-cell-data/TileDB-SOMA/issues/2839 # for more info. - "pandas-stubs>=2" - - "somacore==1.0.18" + - "somacore==1.0.19" - types-setuptools args: ["--config-file=apis/python/pyproject.toml", "apis/python/src", "apis/python/devtools"] pass_filenames: false diff --git a/apis/python/notebooks/tutorial_spatial.ipynb b/apis/python/notebooks/tutorial_spatial.ipynb index 0802d75f05..175b1d8c36 100644 --- a/apis/python/notebooks/tutorial_spatial.ipynb +++ b/apis/python/notebooks/tutorial_spatial.ipynb @@ -197,7 +197,7 @@ "\n", "/home/julia/Software/TileDB-Inc/TileDB-SOMA/apis/python/src/tiledbsoma/_multiscale_image.py:151: UserWarning: Support for spatial types is experimental. Changes to both the API and data storage may not be backwards compatible.\n", " warnings.warn(SPATIAL_DISCLAIMER)\n", - "/home/julia/Software/TileDB-Inc/TileDB-SOMA/apis/python/src/tiledbsoma/_point_cloud.py:116: UserWarning: Support for spatial types is experimental. Changes to both the API and data storage may not be backwards compatible.\n", + "/home/julia/Software/TileDB-Inc/TileDB-SOMA/apis/python/src/tiledbsoma/_point_cloud_dataframe.py:116: UserWarning: Support for spatial types is experimental. Changes to both the API and data storage may not be backwards compatible.\n", " warnings.warn(SPATIAL_DISCLAIMER)\n" ] } @@ -591,7 +591,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 20, @@ -1065,7 +1065,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 28, @@ -1086,7 +1086,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -1100,23 +1100,23 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "id": "8fa549eb-598d-4a0c-9ab9-dd1bfac4199b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 30, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "scene.get_transform_to_point_cloud(\"loc\")" + "scene.get_transform_to_point_cloud_dataframe(\"loc\")" ] }, { @@ -1133,7 +1133,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "id": "18751a3c-0e29-4e00-ba58-3529e7164f6c", "metadata": {}, "outputs": [], @@ -1145,7 +1145,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "id": "02d18d4b-0caf-4fd5-ab6f-a0c8b2122069", "metadata": {}, "outputs": [ @@ -1155,10 +1155,10 @@ "SpatialRead(data=\n", "type: uint8\n", "shape: (753, 853, 3)\n", - "strides: (2559, 3, 1), data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=)" + "strides: (2559, 3, 1), data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=)" ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1178,17 +1178,17 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "id": "e136c1b9-3b7f-44ed-aa20-e2abdb1868e1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "SpatialRead(data=, data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=)" + "SpatialRead(data=, data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=)" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1200,7 +1200,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "id": "53554033-a469-4126-88a3-bd9cec9fd78f", "metadata": {}, "outputs": [ @@ -1380,7 +1380,7 @@ "[928 rows x 7 columns]" ] }, - "execution_count": 34, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1392,7 +1392,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "id": "34a51b21-2d77-47c2-a3e1-bf159cd1b001", "metadata": {}, "outputs": [ @@ -1404,7 +1404,7 @@ " [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])" ] }, - "execution_count": 35, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1416,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "id": "c7c22f26-613c-44d9-9ce9-7abc64b377f9", "metadata": {}, "outputs": [], @@ -1429,7 +1429,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "id": "021cf9b5-9880-4857-8164-326dc26d8b6d", "metadata": {}, "outputs": [], @@ -1449,7 +1449,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "id": "1a67bc46-f768-4a16-8324-828c5a38bd57", "metadata": {}, "outputs": [ diff --git a/apis/python/pyproject.toml b/apis/python/pyproject.toml index de08970241..f5210cec38 100644 --- a/apis/python/pyproject.toml +++ b/apis/python/pyproject.toml @@ -1,8 +1,7 @@ [build-system] requires = [ "pybind11[global]>=2.10.0", - "setuptools>=65.5.1", - "wheel>=0.37.1", + "setuptools>=70.1", # `setuptools.command.bdist_wheel` "cmake>=3.21", ] build-backend = "setuptools.build_meta" diff --git a/apis/python/setup.py b/apis/python/setup.py index a68f47d7e4..100bdc6fad 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -21,8 +21,8 @@ import sys from typing import Optional +import setuptools.command.bdist_wheel import setuptools.command.build_ext -import wheel.bdist_wheel try: from pybind11.setup_helpers import Pybind11Extension @@ -219,7 +219,7 @@ def run(self): super().run() -class bdist_wheel(wheel.bdist_wheel.bdist_wheel): +class bdist_wheel(setuptools.command.bdist_wheel.bdist_wheel): def run(self): find_or_build_package_data(self) super().run() @@ -307,9 +307,9 @@ def run(self): "src/tiledbsoma/soma_array.cc", "src/tiledbsoma/soma_object.cc", "src/tiledbsoma/soma_dataframe.cc", + "src/tiledbsoma/soma_point_cloud_dataframe.cc", "src/tiledbsoma/soma_dense_ndarray.cc", "src/tiledbsoma/soma_sparse_ndarray.cc", - "src/tiledbsoma/soma_point_cloud.cc", "src/tiledbsoma/soma_group.cc", "src/tiledbsoma/soma_collection.cc", "src/tiledbsoma/pytiledbsoma.cc", @@ -336,7 +336,7 @@ def run(self): "scanpy>=1.9.2", "scipy", # Note: the somacore version is in .pre-commit-config.yaml too - "somacore==1.0.18", + "somacore==1.0.19", "tiledb~=0.32.0", "typing-extensions", # Note "-" even though `import typing_extensions` ], diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 082b9c9ac2..1d6f231bc8 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -187,11 +187,9 @@ def _new_shape_feature_flag_enabled() -> bool: from ._indexer import IntIndexer, tiledbsoma_build_index from ._measurement import Measurement from ._multiscale_image import MultiscaleImage -from ._point_cloud import PointCloud +from ._point_cloud_dataframe import PointCloudDataFrame from ._geometry_dataframe import GeometryDataFrame from ._sparse_nd_array import SparseNDArray, SparseNDArrayRead -from ._point_cloud import PointCloud -from ._geometry_dataframe import GeometryDataFrame from ._scene import Scene from .options import SOMATileDBContext, TileDBCreateOptions, TileDBWriteOptions from .pytiledbsoma import ( @@ -229,7 +227,7 @@ def _new_shape_feature_flag_enabled() -> bool: "MultiscaleImage", "NotCreateableError", "open", - "PointCloud", + "PointCloudDataFrame", "ResultOrder", "Scene", "show_package_versions", diff --git a/apis/python/src/tiledbsoma/_constants.py b/apis/python/src/tiledbsoma/_constants.py index 59438e870c..3e139ea728 100644 --- a/apis/python/src/tiledbsoma/_constants.py +++ b/apis/python/src/tiledbsoma/_constants.py @@ -8,15 +8,12 @@ SOMA_JOINID = "soma_joinid" SOMA_GEOMETRY = "soma_geometry" +SOMA_COORDINATE_SPACE_METADATA_KEY = "soma_coordinate_space" +SOMA_MULTISCALE_IMAGE_SCHEMA = "soma_multiscale_image_schema" SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type" SOMA_ENCODING_VERSION_METADATA_KEY = "soma_encoding_version" -SOMA_ENCODING_VERSION = "1" - +SOMA_ENCODING_VERSION = "1.1.0" -# Spatial specific constants -SOMA_GEOMETRY = "soma_geometry" -SOMA_COORDINATE_SPACE_METADATA_KEY = "soma_coordinate_space" -SOMA_MULTISCALE_IMAGE_SCHEMA = "soma_multiscale_image_schema" SPATIAL_DISCLAIMER = ( "Support for spatial types is experimental. Changes to both the API and data " "storage may not be backwards compatible." diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index b3a1e74ca5..cfd9f26a4e 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -417,6 +417,18 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: """ return self._handle.tiledbsoma_has_upgraded_domain + def resize_soma_joinid(self, newshape: int) -> None: + """Increases the shape of the dataframe on the ``soma_joinid`` index + column, if it indeed is an index column, leaving all other index columns + as-is. If the ``soma_joinid`` is not an index column, no change is made. + This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler + to keystroke, and handles the most common case for dataframe domain + expansion. Raises an error if the dataframe doesn't already have a + domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for + 1.15). + """ + self._handle._handle.resize_soma_joinid(newshape) + def __len__(self) -> int: """Returns the number of rows in the dataframe. Same as ``df.count``.""" return self.count @@ -830,9 +842,8 @@ def _fill_out_slot_soma_domain( # Here the slot_domain isn't specified by the user; we're setting it. # The SOMA spec disallows negative soma_joinid. if index_column_name == SOMA_JOINID: - slot_domain = (0, 2**31 - 2) # R-friendly, which 2**63-1 is not - else: - saturated_range = True + slot_domain = (0, 2**63 - 2) + saturated_range = True elif np.issubdtype(dtype, NPFloating): finfo = np.finfo(cast(NPFloating, dtype)) slot_domain = finfo.min, finfo.max diff --git a/apis/python/src/tiledbsoma/_factory.py b/apis/python/src/tiledbsoma/_factory.py index 41ae9ef3f1..bd64d07f05 100644 --- a/apis/python/src/tiledbsoma/_factory.py +++ b/apis/python/src/tiledbsoma/_factory.py @@ -29,14 +29,13 @@ _experiment, _measurement, _multiscale_image, - _point_cloud, + _point_cloud_dataframe, _scene, _soma_object, _sparse_nd_array, _tdb_handles, ) from ._constants import ( - SOMA_ENCODING_VERSION, SOMA_ENCODING_VERSION_METADATA_KEY, SOMA_OBJECT_TYPE_METADATA_KEY, ) @@ -203,7 +202,7 @@ def _read_soma_type(hdl: _tdb_handles.AnyWrapper) -> str: if isinstance(encoding_version, bytes): encoding_version = str(encoding_version, "utf-8") - if encoding_version != SOMA_ENCODING_VERSION: + if encoding_version not in {"1", "1.1.0"}: raise ValueError(f"Unsupported SOMA object encoding version {encoding_version}") return obj_type @@ -222,7 +221,7 @@ def _type_name_to_cls(type_name: str) -> Type[AnySOMAObject]: _multiscale_image.MultiscaleImage, _sparse_nd_array.SparseNDArray, _scene.Scene, - _point_cloud.PointCloud, + _point_cloud_dataframe.PointCloudDataFrame, ) } try: diff --git a/apis/python/src/tiledbsoma/_point_cloud.py b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py similarity index 95% rename from apis/python/src/tiledbsoma/_point_cloud.py rename to apis/python/src/tiledbsoma/_point_cloud_dataframe.py index 72d59de0c4..d17cee7239 100644 --- a/apis/python/src/tiledbsoma/_point_cloud.py +++ b/apis/python/src/tiledbsoma/_point_cloud_dataframe.py @@ -3,7 +3,7 @@ # # Licensed under the MIT License. """ -Implementation of a SOMA Point Cloud +Implementation of a SOMA Point Cloud DataFrame """ import warnings @@ -38,7 +38,7 @@ coordinate_space_to_json, process_spatial_df_region, ) -from ._tdb_handles import PointCloudWrapper +from ._tdb_handles import PointCloudDataFrameWrapper from ._types import OpenTimestamp from .options import SOMATileDBContext from .options._soma_tiledb_context import _validate_soma_tiledb_context @@ -50,11 +50,11 @@ _UNBATCHED = options.BatchSize() -class PointCloud(SpatialDataFrame, somacore.PointCloud): +class PointCloudDataFrame(SpatialDataFrame, somacore.PointCloudDataFrame): """A specialized SOMA DataFrame for storing collections of points in multi-dimensional space. - The ``PointCloud`` class is designed to efficiently store and query point data, + The ``PointCloudDataFrame`` class is designed to efficiently store and query point data, where each point is represented by coordinates in one or more spatial dimensions (e.g., x, y, z) and may have additional columns for associated attributes. @@ -63,7 +63,7 @@ class PointCloud(SpatialDataFrame, somacore.PointCloud): """ __slots__ = ("_coord_space",) - _wrapper_type = PointCloudWrapper + _wrapper_type = PointCloudDataFrameWrapper @classmethod def create( @@ -78,7 +78,7 @@ def create( context: Optional[SOMATileDBContext] = None, tiledb_timestamp: Optional[OpenTimestamp] = None, ) -> Self: - """Creates a new ``PointCloud`` at the given URI. + """Creates a new ``PointCloudDataFrame`` at the given URI. The schema of the created point cloud dataframe will include a column named ``soma_joinid`` of type ``pyarrow.int64``, with negative values disallowed, and @@ -234,7 +234,7 @@ def create( plt_cfg = _util.build_clib_platform_config(platform_config) timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) try: - clib.SOMAPointCloud.create( + clib.SOMAPointCloudDataFrame.create( uri, schema=schema, index_column_info=index_column_info, @@ -256,7 +256,7 @@ def create( def __init__( self, - handle: PointCloudWrapper, + handle: PointCloudDataFrameWrapper, **kwargs: Any, ): super().__init__(handle, **kwargs) @@ -270,8 +270,8 @@ def __init__( for name in self._coord_space.axis_names: if name not in self.index_column_names: raise SOMAError( - f"Point cloud axis '{name}' does not match any of the index column" - f" names." + f"Point cloud dataframe axis '{name}' does not match any of the " + f"index column names." ) # Data operations @@ -284,8 +284,8 @@ def __len__(self) -> int: def count(self) -> int: """Returns the number of rows in the dataframe.""" self._check_open_read() - # if is it in read open mode, then it is a PointCloudWrapper - return cast(PointCloudWrapper, self._handle).count + # if is it in read open mode, then it is a PointCloudDataFrameWrapper + return cast(PointCloudDataFrameWrapper, self._handle).count def read( self, @@ -331,7 +331,7 @@ def read( config.update(platform_config) context = clib.SOMAContext(config) - sr = clib.SOMAPointCloud.open( + sr = clib.SOMAPointCloudDataFrame.open( uri=handle.uri, mode=clib.OpenMode.read, context=context, @@ -420,7 +420,7 @@ def read_spatial_region( raise ValueError( f"The output axes of '{region_transform.output_axes}' of the " f"transform must match the axes '{self._coord_space.axis_names}' " - f"of the coordinate space of this point cloud." + f"of the coordinate space of this point cloud dataframe." ) # Process the user provided region. @@ -494,7 +494,7 @@ def write( @property def coordinate_space(self) -> CoordinateSpace: - """Coordinate space for this point cloud. + """Coordinate space for this point cloud dataframe. Lifecycle: Experimental. @@ -503,7 +503,7 @@ def coordinate_space(self) -> CoordinateSpace: @coordinate_space.setter def coordinate_space(self, value: CoordinateSpace) -> None: - """Coordinate space for this point cloud. + """Coordinate space for this point cloud dataframe. Lifecycle: Experimental. diff --git a/apis/python/src/tiledbsoma/_scene.py b/apis/python/src/tiledbsoma/_scene.py index 5199b1ae25..e02a6d69a0 100644 --- a/apis/python/src/tiledbsoma/_scene.py +++ b/apis/python/src/tiledbsoma/_scene.py @@ -17,7 +17,7 @@ from ._exception import SOMAError from ._geometry_dataframe import GeometryDataFrame from ._multiscale_image import MultiscaleImage -from ._point_cloud import PointCloud +from ._point_cloud_dataframe import PointCloudDataFrame from ._soma_object import AnySOMAObject from ._spatial_util import ( coordinate_space_from_json, @@ -29,7 +29,9 @@ class Scene( # type: ignore[misc] # __eq__ false positive CollectionBase[AnySOMAObject], - somacore.Scene[MultiscaleImage, PointCloud, GeometryDataFrame, AnySOMAObject], + somacore.Scene[ + MultiscaleImage, PointCloudDataFrame, GeometryDataFrame, AnySOMAObject + ], ): """A collection subtype representing spatial assets that can all be stored on a single coordinate space. @@ -38,7 +40,7 @@ class Scene( # type: ignore[misc] # __eq__ false positive Experimental. """ - __slots__ = "_coord_space" + __slots__ = ("_coord_space",) _wrapper_type = _tdb_handles.SceneWrapper _subclass_constrained_soma_types = { @@ -129,11 +131,11 @@ def add_multiscale_image( """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloud.create`. + Parameters are as in :meth:`spatial.MultiscaleImage.create`. See :meth:`add_new_collection` for details about child URIs. Args: - key: The name of the geometry dataframe. + key: The name of the multiscale image. transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. @@ -146,9 +148,9 @@ def add_multiscale_image( raise NotImplementedError() @_funcs.forwards_kwargs_to( - PointCloud.create, exclude=("context", "tiledb_timestamp") + PointCloudDataFrame.create, exclude=("context", "tiledb_timestamp") ) - def add_new_point_cloud( + def add_new_point_cloud_dataframe( self, key: str, subcollection: Union[str, Sequence[str]], @@ -156,11 +158,11 @@ def add_new_point_cloud( *, uri: Optional[str] = None, **kwargs: Any, - ) -> PointCloud: - """Adds a point cloud to the scene and sets a coordinate transform - between the scene and the dataframe. + ) -> PointCloudDataFrame: + """Adds a point cloud dataframe to the scene and sets a coordinate + transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloud.create`. + Parameters are as in :meth:`spatial.PointCloudDataFrame.create`. See :meth:`add_new_collection` for details about child URIs. Args: @@ -170,7 +172,7 @@ def add_new_point_cloud( dataframe is stored in. Defaults to ``'obsl'``. Returns: - The newly created ``PointCloud``, opened for writing. + The newly created ``PointCloudDataFrame``, opened for writing. Lifecycle: experimental """ @@ -291,37 +293,37 @@ def set_transform_to_multiscale_image( coll.metadata[f"soma_scene_registry_{key}"] = transform_to_json(transform) return image - def set_transform_to_point_cloud( + def set_transform_to_point_cloud_dataframe( self, key: str, transform: CoordinateTransform, *, subcollection: Union[str, Sequence[str]] = "obsl", coordinate_space: Optional[CoordinateSpace] = None, - ) -> PointCloud: + ) -> PointCloudDataFrame: """Adds the coordinate transform for the scene coordinate space to - a point cloud stored in the scene. + a point cloud dataframe stored in the scene. - If the subcollection the point cloud is inside of is more than one + If the subcollection the point cloud dataframe is inside of is more than one layer deep, the input should be provided as a sequence of names. For example, to set a transform for a point named `transcripts` in the `var/RNA` collection:: - scene.set_transformation_for_point_cloud( + scene.set_transformation_for_point_cloud_dataframe( 'transcripts', transform, subcollection=['var', 'RNA'], ) Args: - key: The name of the point cloud. - transform: The coordinate transformation from the scene to the point cloud. + key: The name of the point cloud dataframe. + transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the - point cloud is stored in. Defaults to ``'obsl'``. - coordinate_space: Optional coordinate space for the point cloud. This will - replace the existing coordinate space of the point cloud. Defaults to + dataframe is stored in. Defaults to ``'obsl'``. + coordinate_space: Optional coordinate space for the dataframe. This will + replace the existing coordinate space of the dataframe. Defaults to ``None``. Returns: - The point cloud, opened for writing. + The point cloud dataframe, opened for writing. Lifecycle: experimental """ @@ -330,7 +332,7 @@ def set_transform_to_point_cloud( if self.coordinate_space is None: raise SOMAError( "The scene coordinate space must be set before registering a point " - "cloud." + "cloud dataframe." ) # Create the coordinate space if it does not exist. Otherwise, check it is # compatible with the provide transform. @@ -356,11 +358,13 @@ def set_transform_to_point_cloud( except KeyError as ke: raise KeyError(f"No collection '{subcollection}' in this scene.") from ke try: - point_cloud: PointCloud = coll[key] + point_cloud: PointCloudDataFrame = coll[key] except KeyError as ke: - raise KeyError(f"No PointCloud named '{key}' in '{coll}'.") from ke - if not isinstance(point_cloud, PointCloud): - raise TypeError(f"'{key}' in '{subcollection}' is not an PointCloud.") + raise KeyError(f"No PointCloudDataFrame named '{key}' in '{coll}'.") from ke + if not isinstance(point_cloud, PointCloudDataFrame): + raise TypeError( + f"'{key}' in '{subcollection}' is not an PointCloudDataFrame." + ) point_cloud.coordinate_space = coordinate_space coll.metadata[f"soma_scene_registry_{key}"] = transform_to_json(transform) @@ -409,19 +413,19 @@ def get_transform_from_multiscale_image( """ raise NotImplementedError() - def get_transform_from_point_cloud( + def get_transform_from_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> CoordinateTransform: - """Returns the coordinate transformation from the requested point cloud to - the scene. + """Returns the coordinate transformation from the requested point cloud + dataframe to the scene. Args: - key: The name of the point cloud. + key: The name of the point cloud dataframe. subcollection: The name, or sequence of names, of the subcollection the - point cloud is stored in. Defaults to ``'obsl'``. + dataframe is stored in. Defaults to ``'obsl'``. Returns: - Coordinate transform from the scene to the point cloud. + Coordinate transform from the scene to the point cloud dataframe. Lifecycle: experimental """ @@ -510,19 +514,19 @@ def get_transform_to_multiscale_image( level_transform = image.get_transform_to_level(level) return level_transform @ base_transform - def get_transform_to_point_cloud( + def get_transform_to_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> CoordinateTransform: """Returns the coordinate transformation from the scene to a requested - point cloud. + point cloud dataframe. Args: - key: The name of the point cloud. + key: The name of the point cloud dataframe. subcollection: The name, or sequence of names, of the subcollection the - point cloud is stored in. Defaults to ``'obsl'``. + dataframe is stored in. Defaults to ``'obsl'``. Returns: - Coordinate transform from the scene to the requested point cloud. + Coordinate transform from the scene to the point cloud dataframe. Lifecycle: experimental """ diff --git a/apis/python/src/tiledbsoma/_soma_object.py b/apis/python/src/tiledbsoma/_soma_object.py index f8406dc87d..f2da547633 100644 --- a/apis/python/src/tiledbsoma/_soma_object.py +++ b/apis/python/src/tiledbsoma/_soma_object.py @@ -41,7 +41,7 @@ class SOMAObject(somacore.SOMAObject, Generic[_WrapperType_co]): _wrapper_type: Union[ Type[_WrapperType_co], Type[_tdb_handles.DataFrameWrapper], - Type[_tdb_handles.PointCloudWrapper], + Type[_tdb_handles.PointCloudDataFrameWrapper], Type[_tdb_handles.DenseNDArrayWrapper], Type[_tdb_handles.SparseNDArrayWrapper], Type[_tdb_handles.CollectionWrapper], @@ -112,7 +112,7 @@ def __init__( handle: Union[ _WrapperType_co, _tdb_handles.DataFrameWrapper, - _tdb_handles.PointCloudWrapper, + _tdb_handles.PointCloudDataFrameWrapper, _tdb_handles.DenseNDArrayWrapper, _tdb_handles.SparseNDArrayWrapper, ], diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 302f595563..c811d320aa 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -490,8 +490,12 @@ def _dim_capacity_and_extent( int64 is returned for the capacity. """ if dim_shape is None: - dim_capacity = 2**31 - 2 # Make this friendly for reads by tiledbsoma-r + dim_capacity = 2**63 - 1 dim_extent = min(dim_capacity, create_options.dim_tile(dim_name, 2048)) + # For core: "domain max expanded to multiple of tile extent exceeds max value + # representable by domain type. Reduce domain max by 1 tile extent to allow for + # expansion." + dim_capacity -= dim_extent else: if dim_shape <= 0: raise ValueError( diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index f251441802..ee320f3559 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -41,7 +41,7 @@ RawHandle = Union[ clib.SOMAArray, clib.SOMADataFrame, - clib.SOMAPointCloud, + clib.SOMAPointCloudDataFrame, clib.SOMASparseNDArray, clib.SOMADenseNDArray, clib.SOMAGroup, @@ -80,7 +80,7 @@ def open( _type_to_class = { "somadataframe": DataFrameWrapper, - "somapointcloud": PointCloudWrapper, + "somapointclouddataframe": PointCloudDataFrameWrapper, "somadensendarray": DenseNDArrayWrapper, "somasparsendarray": SparseNDArrayWrapper, "somacollection": CollectionWrapper, @@ -95,6 +95,15 @@ def open( soma_object, context ) except KeyError: + if soma_object.type.lower() in { + "somascene", + "somapointclouddataframe", + "somageometrydataframe", + "somamultiscaleimage", + }: + raise NotImplementedError( + f"Support for {soma_object.type!r} is not yet implemented." + ) raise SOMAError(f"{uri!r} has unknown storage type {soma_object.type!r}") @@ -459,6 +468,10 @@ def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None """Not implemented for DataFrame.""" raise NotImplementedError + def resize_soma_joinid(self, newshape: int) -> None: + """Only implemented for DataFrame.""" + raise NotImplementedError + class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]): """Wrapper around a Pybind11 SOMADataFrame handle.""" @@ -505,11 +518,23 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: """ return cast(bool, self._handle.tiledbsoma_has_upgraded_domain) + def resize_soma_joinid(self, newshape: int) -> None: + """Increases the shape of the dataframe on the ``soma_joinid`` index + column, if it indeed is an index column, leaving all other index columns + as-is. If the ``soma_joinid`` is not an index column, no change is made. + This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler + to keystroke, and handles the most common case for dataframe domain + expansion. Raises an error if the dataframe doesn't already have a + domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for + 1.15). + """ + self._handle.resize_soma_joinid(newshape) + -class PointCloudWrapper(SOMAArrayWrapper[clib.SOMAPointCloud]): - """Wrapper around a Pybind11 SOMAPointCloud handle.""" +class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]): + """Wrapper around a Pybind11 SOMAPointCloudDataFrame handle.""" - _ARRAY_WRAPPED_TYPE = clib.SOMAPointCloud + _ARRAY_WRAPPED_TYPE = clib.SOMAPointCloudDataFrame @property def count(self) -> int: @@ -520,7 +545,7 @@ def write(self, values: pa.RecordBatch) -> None: @property def shape(self) -> Tuple[int, ...]: - # Shape is not implemented for point clouds + # Shape is not implemented for point cloud dataframes raise NotImplementedError diff --git a/apis/python/src/tiledbsoma/experimental/ingest.py b/apis/python/src/tiledbsoma/experimental/ingest.py index 64c34cc9c5..92f020db4b 100644 --- a/apis/python/src/tiledbsoma/experimental/ingest.py +++ b/apis/python/src/tiledbsoma/experimental/ingest.py @@ -40,7 +40,7 @@ DenseNDArray, Experiment, MultiscaleImage, - PointCloud, + PointCloudDataFrame, Scene, SparseNDArray, _util, @@ -444,7 +444,7 @@ def _write_visium_data_to_experiment_uri( **ingest_ctx, ) as loc: _maybe_set(obsl, "loc", loc, use_relative_uri=use_relative_uri) - scene.set_transform_to_point_cloud( + scene.set_transform_to_point_cloud_dataframe( "loc", IdentityTransform(("x", "y"), ("x", "y")) ) @@ -558,7 +558,7 @@ def _write_visium_spots( additional_metadata: "AdditionalMetadata" = None, platform_config: Optional["PlatformConfig"] = None, context: Optional["SOMATileDBContext"] = None, -) -> PointCloud: +) -> PointCloudDataFrame: """TODO: Add _write_visium_spot_dataframe docs""" df = ( pd.read_csv(input_tissue_positions) @@ -576,7 +576,7 @@ def _write_visium_spots( arrow_table = df_to_arrow(df) - soma_point_cloud = PointCloud.create( + soma_point_cloud = PointCloudDataFrame.create( df_uri, schema=arrow_table.schema, platform_config=platform_config, diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 613c889817..10893c3c0c 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -46,7 +46,7 @@ DenseNDArray, Experiment, Measurement, - PointCloud, + PointCloudDataFrame, Scene, SparseNDArray, _factory, @@ -1117,7 +1117,7 @@ def _extract_new_values_for_append( def _write_arrow_table( arrow_table: pa.Table, - handle: Union[DataFrame, SparseNDArray, PointCloud], + handle: Union[DataFrame, SparseNDArray, PointCloudDataFrame], tiledb_create_options: TileDBCreateOptions, tiledb_write_options: TileDBWriteOptions, ) -> None: diff --git a/apis/python/src/tiledbsoma/pytiledbsoma.cc b/apis/python/src/tiledbsoma/pytiledbsoma.cc index 87873b3db7..fa97811f04 100644 --- a/apis/python/src/tiledbsoma/pytiledbsoma.cc +++ b/apis/python/src/tiledbsoma/pytiledbsoma.cc @@ -20,7 +20,7 @@ void load_soma_context(py::module&); void load_soma_object(py::module&); void load_soma_array(py::module&); void load_soma_dataframe(py::module&); -void load_soma_point_cloud(py::module&); +void load_soma_point_cloud_dataframe(py::module&); void load_soma_dense_ndarray(py::module&); void load_soma_sparse_ndarray(py::module&); void load_soma_group(py::module&); @@ -210,7 +210,7 @@ PYBIND11_MODULE(pytiledbsoma, m) { load_soma_dataframe(m); load_soma_dense_ndarray(m); load_soma_sparse_ndarray(m); - load_soma_point_cloud(m); + load_soma_point_cloud_dataframe(m); load_soma_group(m); load_soma_collection(m); load_query_condition(m); diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc index e7f7ad84d9..f4275fc77a 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.cc +++ b/apis/python/src/tiledbsoma/soma_dataframe.cc @@ -155,6 +155,17 @@ void load_soma_dataframe(py::module& m) { "maybe_soma_joinid_maxshape", &SOMADataFrame::maybe_soma_joinid_maxshape) .def_property_readonly( - "tiledbsoma_has_upgraded_domain", &SOMAArray::has_current_domain); + "tiledbsoma_has_upgraded_domain", &SOMAArray::has_current_domain) + + .def( + "resize_soma_joinid", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + sdf.resize_soma_joinid(newshape); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a); } } // namespace libtiledbsomacpp diff --git a/apis/python/src/tiledbsoma/soma_object.cc b/apis/python/src/tiledbsoma/soma_object.cc index c0f5c9a258..1156f7148d 100644 --- a/apis/python/src/tiledbsoma/soma_object.cc +++ b/apis/python/src/tiledbsoma/soma_object.cc @@ -73,9 +73,9 @@ void load_soma_object(py::module& m) { if (soma_obj_type == "somadataframe") return py::cast( dynamic_cast(*soma_obj)); - if (soma_obj_type == "somapointcloud") + if (soma_obj_type == "somapointclouddataframe") return py::cast( - dynamic_cast(*soma_obj)); + dynamic_cast(*soma_obj)); else if (soma_obj_type == "somasparsendarray") return py::cast( dynamic_cast(*soma_obj)); diff --git a/apis/python/src/tiledbsoma/soma_point_cloud.cc b/apis/python/src/tiledbsoma/soma_point_cloud_dataframe.cc similarity index 90% rename from apis/python/src/tiledbsoma/soma_point_cloud.cc rename to apis/python/src/tiledbsoma/soma_point_cloud_dataframe.cc index dd1046bfd0..cbc16bb59e 100644 --- a/apis/python/src/tiledbsoma/soma_point_cloud.cc +++ b/apis/python/src/tiledbsoma/soma_point_cloud_dataframe.cc @@ -1,5 +1,5 @@ /** - * @file soma_point_cloud.cc + * @file soma_point_cloud_dataframe.cc * * @section LICENSE * @@ -27,7 +27,7 @@ * * @section DESCRIPTION * - * This file defines the SOMAPointCloud bindings. + * This file defines the SOMAPointCloudDataFrame bindings. */ #include @@ -46,8 +46,9 @@ namespace py = pybind11; using namespace py::literals; using namespace tiledbsoma; -void load_soma_point_cloud(py::module& m) { - py::class_(m, "SOMAPointCloud") +void load_soma_point_cloud_dataframe(py::module& m) { + py::class_( + m, "SOMAPointCloudDataFrame") .def_static( "create", @@ -99,7 +100,7 @@ void load_soma_point_cloud(py::module& m) { index_column_array_ptr, index_column_schema_ptr); try { - SOMAPointCloud::create( + SOMAPointCloudDataFrame::create( uri, std::make_unique(schema), ArrowTable( @@ -132,7 +133,7 @@ void load_soma_point_cloud(py::module& m) { std::vector, ResultOrder, std::optional>>( - &SOMAPointCloud::open), + &SOMAPointCloudDataFrame::open), "uri"_a, "mode"_a, "context"_a, @@ -141,12 +142,12 @@ void load_soma_point_cloud(py::module& m) { "result_order"_a = ResultOrder::automatic, "timestamp"_a = py::none()) - .def_static("exists", &SOMAPointCloud::exists) + .def_static("exists", &SOMAPointCloudDataFrame::exists) .def_property_readonly( - "index_column_names", &SOMAPointCloud::index_column_names) + "index_column_names", &SOMAPointCloudDataFrame::index_column_names) .def_property_readonly( "count", - &SOMAPointCloud::count, + &SOMAPointCloudDataFrame::count, py::call_guard()); } } // namespace libtiledbsomacpp diff --git a/apis/python/tests/test_point_cloud.py b/apis/python/tests/test_point_cloud_dataframe.py similarity index 93% rename from apis/python/tests/test_point_cloud.py rename to apis/python/tests/test_point_cloud_dataframe.py index 0c082584db..95d50ed99d 100644 --- a/apis/python/tests/test_point_cloud.py +++ b/apis/python/tests/test_point_cloud_dataframe.py @@ -15,19 +15,21 @@ def test_point_cloud_bad_create(tmp_path): # axis names must be in index column names asch = pa.schema([("x", pa.float64()), ("y", pa.float64())]) with pytest.raises(ValueError): - soma.PointCloud.create( + soma.PointCloudDataFrame.create( urljoin(baseuri, "bad_name_subset"), schema=asch, index_column_names="x" ) # all spatial axis must have the same type asch = pa.schema([("x", pa.float64()), ("y", pa.int64())]) with pytest.raises(ValueError): - soma.PointCloud.create(urljoin(baseuri, "different_types"), schema=asch) + soma.PointCloudDataFrame.create( + urljoin(baseuri, "different_types"), schema=asch + ) # type must be integral or floating-point asch = pa.schema([("x", pa.large_string()), ("y", pa.large_string())]) with pytest.raises(ValueError): - soma.PointCloud.create(urljoin(baseuri, "bad_type"), schema=asch) + soma.PointCloudDataFrame.create(urljoin(baseuri, "bad_type"), schema=asch) def test_point_cloud_basic_read(tmp_path): @@ -36,7 +38,9 @@ def test_point_cloud_basic_read(tmp_path): asch = pa.schema([("x", pa.float64()), ("y", pa.float64())]) # defaults - with soma.PointCloud.create(urljoin(baseuri, "default"), schema=asch) as ptc: + with soma.PointCloudDataFrame.create( + urljoin(baseuri, "default"), schema=asch + ) as ptc: pydict = {} pydict["soma_joinid"] = [1, 2, 3, 4, 5] pydict["x"] = [10, 20, 30, 40, 50] @@ -45,7 +49,7 @@ def test_point_cloud_basic_read(tmp_path): rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(urljoin(baseuri, "default"), "r") as ptc: + with soma.PointCloudDataFrame.open(urljoin(baseuri, "default"), "r") as ptc: assert set(ptc.schema.names) == set(ptc.index_column_names) assert ptc.index_column_names == ("soma_joinid", "x", "y") assert ptc.axis_names == ("x", "y") @@ -58,7 +62,7 @@ def test_point_cloud_basic_read(tmp_path): assert [e.as_py() for e in table["y"]] == pydict["y"] # with user defined values - with soma.PointCloud.create( + with soma.PointCloudDataFrame.create( urljoin(baseuri, "user_defined"), schema=asch, index_column_names="x", @@ -73,7 +77,7 @@ def test_point_cloud_basic_read(tmp_path): rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(urljoin(baseuri, "user_defined"), "r") as ptc: + with soma.PointCloudDataFrame.open(urljoin(baseuri, "user_defined"), "r") as ptc: assert set(ptc.schema.names) == set(["soma_joinid", "x", "y"]) assert ptc.index_column_names == ("x",) assert ptc.axis_names == ("x",) @@ -92,7 +96,7 @@ def test_point_cloud_coordinate_space(tmp_path): asch = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=asch) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=asch) as ptc: assert len(ptc.coordinate_space) == 2 assert ptc.coordinate_space.axis_names == ("x", "y") assert ptc.coordinate_space.axes == (soma.Axis(name="x"), soma.Axis(name="y")) @@ -115,7 +119,7 @@ def test_point_cloud_bad_read_spatial_region(tmp_path): schema = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -124,7 +128,7 @@ def test_point_cloud_bad_read_spatial_region(tmp_path): rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: # Cannot specify the output coordinate space when transform is None with pytest.raises(ValueError): ptc.read_spatial_region( @@ -269,7 +273,7 @@ def test_point_cloud_read_spatial_region_basic_2d( schema = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -278,7 +282,7 @@ def test_point_cloud_read_spatial_region_basic_2d( rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: actual_output = ptc.read_spatial_region( region=region, value_filter=value_filter ) @@ -389,7 +393,7 @@ def test_point_cloud_read_spatial_region_basic_3d( schema = pa.schema([("x", pa.float64()), ("y", pa.float64()), ("z", pa.float64())]) - with soma.PointCloud.create( + with soma.PointCloudDataFrame.create( uri, schema=schema, index_column_names=("soma_joinid", "x", "y", "z"), @@ -404,7 +408,7 @@ def test_point_cloud_read_spatial_region_basic_3d( rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: actual_output = ptc.read_spatial_region(region=region) assert actual_output.data.concat().to_pydict() == expected_output @@ -423,7 +427,7 @@ def test_point_cloud_read_spatial_region_2d_bad(tmp_path, name, region, exc_type schema = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -432,7 +436,7 @@ def test_point_cloud_read_spatial_region_2d_bad(tmp_path, name, region, exc_type rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: with pytest.raises(exc_type): ptc.read_spatial_region(region=region) @@ -452,7 +456,7 @@ def test_point_cloud_read_spatial_region_3d_bad(tmp_path, name, region, exc_type schema = pa.schema([("x", pa.float64()), ("y", pa.float64()), ("z", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -462,7 +466,7 @@ def test_point_cloud_read_spatial_region_3d_bad(tmp_path, name, region, exc_type rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: with pytest.raises(exc_type): ptc.read_spatial_region(region=region) @@ -470,7 +474,7 @@ def test_point_cloud_read_spatial_region_3d_bad(tmp_path, name, region, exc_type def point_cloud_read_spatial_region_transform_setup(uri, transform, input_axes, kwargs): schema = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -482,7 +486,7 @@ def point_cloud_read_spatial_region_transform_setup(uri, transform, input_axes, output_names = ("x", "y") input_names = tuple(axis.name for axis in input_axes) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: read_spatial_region = ptc.read_spatial_region( region_transform=transform( input_axes=input_names, output_axes=output_names, **kwargs @@ -671,7 +675,7 @@ def test_point_cloud_read_spatial_region_region_coord_space(tmp_path): schema = pa.schema([("x", pa.float64()), ("y", pa.float64())]) - with soma.PointCloud.create(uri, schema=schema) as ptc: + with soma.PointCloudDataFrame.create(uri, schema=schema) as ptc: pydict = { "soma_joinid": [1, 2, 3, 4, 5], "x": [10, 20, 30, 40, 50], @@ -680,7 +684,7 @@ def test_point_cloud_read_spatial_region_region_coord_space(tmp_path): rb = pa.Table.from_pydict(pydict) ptc.write(rb) - with soma.PointCloud.open(uri, "r") as ptc: + with soma.PointCloudDataFrame.open(uri, "r") as ptc: output = ptc.read_spatial_region() assert output.output_coordinate_space.axis_names == ("x", "y") diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py index cedcb9c37c..e02c543650 100644 --- a/apis/python/tests/test_shape.py +++ b/apis/python/tests/test_shape.py @@ -126,16 +126,6 @@ def test_sparse_nd_array_basics( with tiledbsoma.SparseNDArray.open(uri) as snda: assert snda.shape == arg_shape - # Test resize too big - new_shape = tuple([4_000_000_000 for i in range(ndim)]) - # TODO: check draft spec - # with pytest.raises(ValueError): - with pytest.raises(tiledbsoma.SOMAError): - with tiledbsoma.SparseNDArray.open(uri, "w") as snda: - snda.resize(new_shape) - with tiledbsoma.SparseNDArray.open(uri) as snda: - assert snda.shape == arg_shape - # Test writes out of bounds with tiledbsoma.SparseNDArray.open(uri, "w") as snda: with pytest.raises(tiledbsoma.SOMAError): @@ -144,7 +134,7 @@ def test_sparse_nd_array_basics( table = pa.Table.from_pydict(dikt) snda.write(table) - # Test reasonable resize + # Test resize new_shape = tuple([arg_shape[i] + 50 for i in range(ndim)]) with tiledbsoma.SparseNDArray.open(uri, "w") as snda: snda.resize(new_shape) @@ -226,14 +216,14 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names): ] ) - data = pa.Table.from_pydict( - { - "soma_joinid": [0, 1, 2, 3], - "mystring": ["a", "b", "a", "b"], - "myint": [20, 30, 40, 50], - "myfloat": [1.0, 2.5, 4.0, 5.5], - } - ) + data_dict = { + "soma_joinid": [0, 1, 2, 3], + "mystring": ["a", "b", "a", "b"], + "myint": [20, 30, 40, 50], + "myfloat": [1.0, 2.5, 4.0, 5.5], + } + + data = pa.Table.from_pydict(data_dict) domain_slots = { "soma_joinid": soma_joinid_domain, @@ -242,6 +232,8 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names): "myfloat": (-999.5, 999.5), } + has_soma_joinid_dim = "soma_joinid" in index_column_names + domain = tuple([domain_slots[name] for name in index_column_names]) soma_joinid_coords = data["soma_joinid"] @@ -275,3 +267,43 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names): assert sdf._maybe_soma_joinid_maxshape is None assert len(sdf.non_empty_domain()) == len(index_column_names) + + # This may be None if soma_joinid is not an index column + shape_at_create = sdf._maybe_soma_joinid_shape + + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + + # Test resize down + new_shape = 0 + with tiledbsoma.DataFrame.open(uri, "w") as sdf: + if has_soma_joinid_dim: + # TODO: check draft spec + # with pytest.raises(ValueError): + with pytest.raises(tiledbsoma.SOMAError): + sdf.resize_soma_joinid(new_shape) + else: + sdf.resize_soma_joinid(new_shape) + + with tiledbsoma.DataFrame.open(uri) as sdf: + assert sdf._maybe_soma_joinid_shape == shape_at_create + + # Test writes out of bounds, before resize + offset = shape_at_create if has_soma_joinid_dim else 100 + data_dict["soma_joinid"] = [e + offset for e in data_dict["soma_joinid"]] + data = pa.Table.from_pydict(data_dict) + + with tiledbsoma.DataFrame.open(uri, "w") as sdf: + if has_soma_joinid_dim: + with pytest.raises(tiledbsoma.SOMAError): + sdf.write(data) + else: + sdf.write(data) + + # Test resize + new_shape = 0 if shape_at_create is None else shape_at_create + 100 + with tiledbsoma.DataFrame.open(uri, "w") as sdf: + sdf.resize_soma_joinid(new_shape) + + # Test writes out of old bounds, within new bounds, after resize + with tiledbsoma.DataFrame.open(uri, "w") as sdf: + sdf.write(data) diff --git a/apis/r/DESCRIPTION b/apis/r/DESCRIPTION index 68f13ef250..8a2eac0540 100644 --- a/apis/r/DESCRIPTION +++ b/apis/r/DESCRIPTION @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices, like those commonly used for single cell data analysis. It is documented at ; a formal specification available is at . -Version: 1.14.99.1 +Version: 1.14.99.2 Authors@R: c( person(given = "Aaron", family = "Wolen", role = c("cre", "aut"), email = "aaron@tiledb.com", diff --git a/apis/r/NEWS.md b/apis/r/NEWS.md index a24e892df0..b2ccab779b 100644 --- a/apis/r/NEWS.md +++ b/apis/r/NEWS.md @@ -2,6 +2,7 @@ ## Changes +* Implement missing `domain` argument to `SOMADataFrame` `create` [#3032](https://github.com/single-cell-data/TileDB-SOMA/pull/3032) * Remove unused `fragment_count` accessor [#3054](https://github.com/single-cell-data/TileDB-SOMA/pull/3054) # tiledbsoma 1.14.1 diff --git a/apis/r/R/Factory.R b/apis/r/R/Factory.R index 7a4f27be34..d119a1cca1 100644 --- a/apis/r/R/Factory.R +++ b/apis/r/R/Factory.R @@ -8,6 +8,14 @@ #' @param index_column_names A vector of column names to use as user-defined #' index columns; all named columns must exist in the schema, and at least #' one index column name is required +#' @param domain An optional list of 2-element vectors specifying the domain of each index +#' column. Each vector should be a pair consisting of the minimum and maximum values storable in +#' the index column. For example, if there is a single int64-valued index column, then `domain` +#' might be `c(100, 200)` to indicate that values between 100 and 200, inclusive, can be stored +#' in that column. If provided, this list must have the same length as `index_column_names`, +#' and the index-column domain will be as specified. If omitted entirely, or if `NULL` in a given +#' dimension, the corresponding index-column domain will use the minimum and maximum possible +#' values for the column's datatype. This makes a `DataFrame` growable. #' @param ingest_mode Ingestion mode when creating the TileDB object; choose from: #' \itemize{ #' \item \dQuote{\code{write}}: create a new TileDB object and error if it already exists @@ -24,6 +32,7 @@ SOMADataFrameCreate <- function( uri, schema, index_column_names = c("soma_joinid"), + domain = NULL, ingest_mode = c("write", "resume"), platform_config = NULL, tiledbsoma_ctx = NULL, @@ -50,6 +59,7 @@ SOMADataFrameCreate <- function( sdf$create( schema, index_column_names = index_column_names, + domain = domain, platform_config = platform_config, internal_use_only = "allowed_use" ) diff --git a/apis/r/R/RcppExports.R b/apis/r/R/RcppExports.R index 326c898385..c1d74177a6 100644 --- a/apis/r/R/RcppExports.R +++ b/apis/r/R/RcppExports.R @@ -218,6 +218,10 @@ resize <- function(uri, new_shape, ctxxp) { invisible(.Call(`_tiledbsoma_resize`, uri, new_shape, ctxxp)) } +resize_soma_joinid <- function(uri, new_shape, ctxxp) { + invisible(.Call(`_tiledbsoma_resize_soma_joinid`, uri, new_shape, ctxxp)) +} + tiledbsoma_upgrade_shape <- function(uri, new_shape, ctxxp) { invisible(.Call(`_tiledbsoma_tiledbsoma_upgrade_shape`, uri, new_shape, ctxxp)) } diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 803468a601..0a8c669c99 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -20,12 +20,21 @@ SOMADataFrame <- R6::R6Class( #' @param index_column_names A vector of column names to use as user-defined #' index columns. All named columns must exist in the schema, and at least #' one index column name is required. + #' @param domain An optional list of 2-element vectors specifying the domain of each index + #' column. Each vector should be a pair consisting of the minimum and maximum values storable in + #' the index column. For example, if there is a single int64-valued index column, then `domain` + #' might be `c(100, 200)` to indicate that values between 100 and 200, inclusive, can be stored + #' in that column. If provided, this list must have the same length as `index_column_names`, + #' and the index-column domain will be as specified. If omitted entirely, or if `NULL` in a given + #' dimension, the corresponding index-column domain will use the minimum and maximum possible + #' values for the column's datatype. This makes a `DataFrame` growable. #' @template param-platform-config #' @param internal_use_only Character value to signal this is a 'permitted' call, #' as `create()` is considered internal and should not be called directly. create = function( schema, index_column_names = c("soma_joinid"), + domain = NULL, platform_config = NULL, internal_use_only = NULL ) { @@ -35,6 +44,20 @@ SOMADataFrame <- R6::R6Class( } schema <- private$validate_schema(schema, index_column_names) + stopifnot( + "domain must be NULL or a named list, with values being 2-element vectors or NULL" = is.null(domain) || + ( # Check that `domain` is a list of length `length(index_column_names)` + # where all values are named after `index_column_names` + # and all values are `NULL` or a two-length atomic non-factor vector + rlang::is_list(domain, n = length(index_column_names)) && + identical(sort(names(domain)), sort(index_column_names)) && + all(vapply_lgl( + domain, + function(x) is.null(x) || (is.atomic(x) && !is.factor(x) && length(x) == 2L) + )) + ) + ) + attr_column_names <- setdiff(schema$names, index_column_names) stopifnot("At least one non-index column must be defined in the schema" = length(attr_column_names) > 0) @@ -43,12 +66,14 @@ SOMADataFrame <- R6::R6Class( # typed, queryable data structure. tiledb_create_options <- TileDBCreateOptions$new(platform_config) - ## we (currently pass domain and extent values in an arrow table (i.e. data.frame alike) - ## where each dimension is one column (of the same type as in the schema) followed by three - ## values for the domain pair and the extent + # We currently pass domain and extent values in an arrow table (i.e. data.frame alike) + # where each dimension is one column (of the same type as in the schema followed by: + # * Before the new shape feature: three values for the domain pair and the extent; + # * After the new shape feature: five values for the maxdomain pair, extent, and domain. dom_ext_tbl <- get_domain_and_extent_dataframe( schema, ind_col_names = index_column_names, + domain = domain, tdco = tiledb_create_options ) @@ -370,6 +395,26 @@ SOMADataFrame <- R6::R6Class( #' @return Logical tiledbsoma_has_upgraded_domain = function() { has_current_domain(self$uri, private$.soma_context) + }, + + #' @description Increases the shape of the dataframe on the ``soma_joinid`` + #' index column, if it indeed is an index column, leaving all other index + #' columns as-is. If the ``soma_joinid`` is not an index column, no change is + #' made. This is a special case of ``upgrade_domain`` (WIP for 1.15), but + #' simpler to keystroke, and handles the most common case for dataframe + #' domain expansion. Raises an error if the dataframe doesn't already have a + #' domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for + #' 1.15). + #' @param new_shape An integer, greater than or equal to 1 + the + #' `soma_joinid` domain slot. + #' @return No return value + resize_soma_joinid = function(new_shape) { + + stopifnot("'new_shape' must be an integer" = rlang::is_integerish(new_shape, n = 1) || + (bit64::is.integer64(new_shape) && length(new_shape) == 1) + ) + # Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma + invisible(resize_soma_joinid(self$uri, new_shape, private$.soma_context)) } ), diff --git a/apis/r/R/utils-arrow.R b/apis/r/R/utils-arrow.R index b691579723..a7d3d4df3e 100644 --- a/apis/r/R/utils-arrow.R +++ b/apis/r/R/utils-arrow.R @@ -348,7 +348,7 @@ extract_levels <- function(arrtbl, exclude_cols=c("soma_joinid")) { #' Domain and extent table creation helper for data.frame writes returning a Table with #' a column per dimension for the given (incoming) arrow schema of a Table #' @noRd -get_domain_and_extent_dataframe <- function(tbl_schema, ind_col_names, +get_domain_and_extent_dataframe <- function(tbl_schema, ind_col_names, domain = NULL, tdco = TileDBCreateOptions$new(PlatformConfig$new())) { stopifnot("First argument must be an arrow schema" = inherits(tbl_schema, "Schema"), "Second argument must be character" = is.character(ind_col_names), @@ -356,13 +356,25 @@ get_domain_and_extent_dataframe <- function(tbl_schema, ind_col_names, "Second argument index names must be columns in first argument" = all(is.finite(match(ind_col_names, names(tbl_schema)))), "Third argument must be options wrapper" = inherits(tdco, "TileDBCreateOptions")) + stopifnot( + "domain must be NULL or a named list, with values being 2-element vectors or NULL" = is.null(domain) || + ( # Check that `domain` is a list of length `length(ind_col_names)` + # where all values are named after `ind_col_names` + # and all values are `NULL` or a two-length atomic non-factor vector + rlang::is_list(domain, n = length(ind_col_names)) && + identical(sort(names(domain)), sort(ind_col_names)) && + all(vapply_lgl( + domain, + function(x) is.null(x) || (is.atomic(x) && !is.factor(x) && length(x) == 2L) + )) + ) + ) + rl <- sapply(ind_col_names, \(ind_col_name) { ind_col <- tbl_schema$GetFieldByName(ind_col_name) ind_col_type <- ind_col$type ind_col_type_name <- ind_col$type$name - # TODO: tiledbsoma-r does not accept the domain argument to SOMADataFrame::create, but should - # https://github.com/single-cell-data/TileDB-SOMA/issues/2967 ind_ext <- tdco$dim_tile(ind_col_name) # Default 2048 mods to 0 for 8-bit types and 0 is an invalid extent @@ -384,21 +396,59 @@ get_domain_and_extent_dataframe <- function(tbl_schema, ind_col_names, ind_max_dom <- arrow_type_range(ind_col_type) - c(0, ind_ext) } - ind_cur_dom <- ind_max_dom + requested_slot <- domain[[ind_col_name]] + ind_cur_dom <- if (is.null(requested_slot)) { + ind_max_dom + } else { + requested_slot + } + # Core supports no domain specification for variable-length dims, which + # includes string/binary dims. if (ind_col_type_name %in% c("string", "large_utf8", "utf8")) ind_ext <- NA # https://github.com/single-cell-data/TileDB-SOMA/issues/2407 if (.new_shape_feature_flag_is_enabled()) { if (ind_col_type_name %in% c("string", "utf8", "large_utf8")) { - aa <- arrow::arrow_array(c("", "", "", "", ""), ind_col_type) + aa <- if (is.null(requested_slot)) { + arrow::arrow_array(c("", "", "", "", ""), ind_col_type) + } else { + arrow::arrow_array(c("", "", "", requested_slot[[1]], requested_slot[[2]]), ind_col_type) + } } else { + # If they wanted (0, 99) then extent must be at most 100. + # This is tricky though. Some cases: + # * lo = 0, hi = 99, extent = 1000 + # We look at hi - lo + 1; resize extent down to 100 + # * lo = 1000, hi = 1099, extent = 1000 + # We look at hi - lo + 1; resize extent down to 100 + # * lo = min for datatype, hi = max for datatype + # We get integer overflow trying to compute hi - lo + 1 + # So if lo <= 0 and hi >= ind_ext, this is fine without + # computing hi - lo + 1. + lo <- ind_max_dom[[1]] + hi <- ind_max_dom[[2]] + if (lo > 0 || hi < ind_ext) { + dom_span <- hi - lo + 1 + if (ind_ext > dom_span) { + ind_ext <- dom_span + } + } aa <- arrow::arrow_array(c(ind_max_dom, ind_ext, ind_cur_dom), ind_col_type) } } else { if (ind_col_type_name %in% c("string", "utf8", "large_utf8")) { aa <- arrow::arrow_array(c("", "", ""), ind_col_type) } else { - aa <- arrow::arrow_array(c(ind_max_dom, ind_ext), ind_col_type) + # Same comments as above + lo <- ind_cur_dom[[1]] + hi <- ind_cur_dom[[2]] + if (lo > 0 || hi < ind_ext) { + dom_span <- hi - lo + 1 + if (ind_ext > dom_span) { + ind_ext <- dom_span + } + } + aa <- arrow::arrow_array(c(ind_cur_dom, ind_ext), ind_col_type) } } diff --git a/apis/r/R/utils.R b/apis/r/R/utils.R index dec2f8aa38..61bfa2fca7 100644 --- a/apis/r/R/utils.R +++ b/apis/r/R/utils.R @@ -219,7 +219,7 @@ read_only_error <- function(field_name) { SOMA_OBJECT_TYPE_METADATA_KEY <- "soma_object_type" SOMA_ENCODING_VERSION_METADATA_KEY <- "soma_encoding_version" -SOMA_ENCODING_VERSION <- "1" +SOMA_ENCODING_VERSION <- "1.1.0" #' @importFrom Matrix as.matrix #' @importFrom arrow RecordBatch diff --git a/apis/r/R/write_soma.R b/apis/r/R/write_soma.R index e1433458d6..3287b619d5 100644 --- a/apis/r/R/write_soma.R +++ b/apis/r/R/write_soma.R @@ -219,12 +219,31 @@ write_soma.data.frame <- function( choices = names(x), several.ok = TRUE ) + + # For index_column_name being soma_joinid -- this being the default + # -- set that domain slot to match the data. This will endow the + # dataframe with something users think of as a "shape". For the + # other slots, set the domain wide open. + # + domain <- NULL + if (.new_shape_feature_flag_is_enabled()) { + domain <- list() + for (index_column_name in index_column_names) { + if (index_column_name == "soma_joinid") { + domain[["soma_joinid"]] <- c(0, nrow(x) - 1) + } else { + domain[[index_column_name]] <- NULL + } + } + } + # Create the SOMADataFrame tbl <- arrow::arrow_table(x) sdf <- SOMADataFrameCreate( uri = uri, schema = tbl$schema, index_column_names = index_column_names, + domain = domain, ingest_mode = ingest_mode, platform_config = platform_config, tiledbsoma_ctx = tiledbsoma_ctx @@ -239,6 +258,9 @@ write_soma.data.frame <- function( NULL } } + if (ingest_mode %in% c('resume') && sdf$tiledbsoma_has_upgraded_domain()) { + sdf$resize_soma_joinid(nrow(x)) + } if (!is.null(tbl)) { sdf$write(tbl) } diff --git a/apis/r/man/BlockwiseReadIterBase.Rd b/apis/r/man/BlockwiseReadIterBase.Rd index af3571eca4..7d406e09cb 100644 --- a/apis/r/man/BlockwiseReadIterBase.Rd +++ b/apis/r/man/BlockwiseReadIterBase.Rd @@ -8,7 +8,7 @@ Class that allows for blockwise read iteration of SOMA reads } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{BlockwiseReadIterBase} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{BlockwiseReadIterBase} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/BlockwiseSparseReadIter.Rd b/apis/r/man/BlockwiseSparseReadIter.Rd index c4aad28fc0..1906a3e9ff 100644 --- a/apis/r/man/BlockwiseSparseReadIter.Rd +++ b/apis/r/man/BlockwiseSparseReadIter.Rd @@ -9,7 +9,7 @@ as sparse matrices } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::ReadIter} -> \code{tiledbsoma::BlockwiseReadIterBase} -> \code{BlockwiseSparseReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{\link[tiledbsoma:BlockwiseReadIterBase]{tiledbsoma::BlockwiseReadIterBase}} -> \code{BlockwiseSparseReadIter} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/BlockwiseTableReadIter.Rd b/apis/r/man/BlockwiseTableReadIter.Rd index f83b1f902d..72db146d0d 100644 --- a/apis/r/man/BlockwiseTableReadIter.Rd +++ b/apis/r/man/BlockwiseTableReadIter.Rd @@ -9,7 +9,7 @@ as Arrow \code{\link[Arrow]{Table}s} } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::ReadIter} -> \code{tiledbsoma::BlockwiseReadIterBase} -> \code{BlockwiseTableReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{\link[tiledbsoma:BlockwiseReadIterBase]{tiledbsoma::BlockwiseReadIterBase}} -> \code{BlockwiseTableReadIter} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/ConfigList.Rd b/apis/r/man/ConfigList.Rd index 7e9c9dcf84..52ba1f9a1d 100644 --- a/apis/r/man/ConfigList.Rd +++ b/apis/r/man/ConfigList.Rd @@ -10,7 +10,7 @@ Essentially, serves as a nested map where the inner map is a \code{\{: \link[tiledbsoma:ScalarMap]{\{: \}}\}} } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{ConfigList} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{ConfigList} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/EphemeralCollection.Rd b/apis/r/man/EphemeralCollection.Rd index e56ab379a8..6d512dce50 100644 --- a/apis/r/man/EphemeralCollection.Rd +++ b/apis/r/man/EphemeralCollection.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralCollection} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralCollection} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/EphemeralCollectionBase.Rd b/apis/r/man/EphemeralCollectionBase.Rd index a0ad8235e5..f482e6f610 100644 --- a/apis/r/man/EphemeralCollectionBase.Rd +++ b/apis/r/man/EphemeralCollectionBase.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{EphemeralCollectionBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{EphemeralCollectionBase} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/EphemeralExperiment.Rd b/apis/r/man/EphemeralExperiment.Rd index 37b2a2f21e..55efa0484e 100644 --- a/apis/r/man/EphemeralExperiment.Rd +++ b/apis/r/man/EphemeralExperiment.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralExperiment} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralExperiment} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/EphemeralMeasurement.Rd b/apis/r/man/EphemeralMeasurement.Rd index 66e5cae834..7881771d62 100644 --- a/apis/r/man/EphemeralMeasurement.Rd +++ b/apis/r/man/EphemeralMeasurement.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralMeasurement} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralMeasurement} } \section{Active bindings}{ \if{html}{\out{
}} diff --git a/apis/r/man/PlatformConfig.Rd b/apis/r/man/PlatformConfig.Rd index b8a7b426d8..af1fc2558c 100644 --- a/apis/r/man/PlatformConfig.Rd +++ b/apis/r/man/PlatformConfig.Rd @@ -11,7 +11,7 @@ map is a \code{\link{ScalarMap}} contained within a \code{\link{ConfigList}} \code{\{platform: \{param: \{key: value\}\}\}} } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{PlatformConfig} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{PlatformConfig} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMAArrayBase.Rd b/apis/r/man/SOMAArrayBase.Rd index dc854adc48..61885f1c9d 100644 --- a/apis/r/man/SOMAArrayBase.Rd +++ b/apis/r/man/SOMAArrayBase.Rd @@ -9,7 +9,7 @@ experimental) } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{SOMAArrayBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{SOMAArrayBase} } \section{Active bindings}{ \if{html}{\out{
}} @@ -44,6 +44,7 @@ experimental)
  • tiledbsoma::TileDBArray$maxshape()
  • tiledbsoma::TileDBArray$ndim()
  • tiledbsoma::TileDBArray$non_empty_domain()
  • +
  • tiledbsoma::TileDBArray$non_empty_domain_new()
  • tiledbsoma::TileDBArray$open()
  • tiledbsoma::TileDBArray$print()
  • tiledbsoma::TileDBArray$schema()
  • diff --git a/apis/r/man/SOMACollection.Rd b/apis/r/man/SOMACollection.Rd index b3677aac28..cef328f481 100644 --- a/apis/r/man/SOMACollection.Rd +++ b/apis/r/man/SOMACollection.Rd @@ -10,7 +10,7 @@ the values are any SOMA-defined foundational or composed type, including \code{\link{SOMASparseNDArray}}, or \code{\link{SOMAExperiment}}. (lifecycle: maturing) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMACollection} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMACollection} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMACollectionBase.Rd b/apis/r/man/SOMACollectionBase.Rd index d715e90351..aa0039cbad 100644 --- a/apis/r/man/SOMACollectionBase.Rd +++ b/apis/r/man/SOMACollectionBase.Rd @@ -12,7 +12,7 @@ objects, mapping string keys to any SOMA object. (lifecycle: maturing) } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{SOMACollectionBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{SOMACollectionBase} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMAContextBase.Rd b/apis/r/man/SOMAContextBase.Rd index f2dae9522f..60e5a90233 100644 --- a/apis/r/man/SOMAContextBase.Rd +++ b/apis/r/man/SOMAContextBase.Rd @@ -10,7 +10,7 @@ context options } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::MappingBase} -> \code{tiledbsoma::ScalarMap} -> \code{SOMAContextBase} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{\link[tiledbsoma:ScalarMap]{tiledbsoma::ScalarMap}} -> \code{SOMAContextBase} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMADataFrame.Rd b/apis/r/man/SOMADataFrame.Rd index 70aba0c64c..a66f17affd 100644 --- a/apis/r/man/SOMADataFrame.Rd +++ b/apis/r/man/SOMADataFrame.Rd @@ -10,7 +10,7 @@ row and is intended to act as a join key for other objects, such as \code{\link{SOMASparseNDArray}}. (lifecycle: maturing) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{SOMADataFrame} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{SOMADataFrame} } \section{Methods}{ \subsection{Public methods}{ @@ -21,6 +21,8 @@ row and is intended to act as a join key for other objects, such as \item \href{#method-SOMADataFrame-update}{\code{SOMADataFrame$update()}} \item \href{#method-SOMADataFrame-shape}{\code{SOMADataFrame$shape()}} \item \href{#method-SOMADataFrame-maxshape}{\code{SOMADataFrame$maxshape()}} +\item \href{#method-SOMADataFrame-domain}{\code{SOMADataFrame$domain()}} +\item \href{#method-SOMADataFrame-maxdomain}{\code{SOMADataFrame$maxdomain()}} \item \href{#method-SOMADataFrame-tiledbsoma_has_upgraded_domain}{\code{SOMADataFrame$tiledbsoma_has_upgraded_domain()}} \item \href{#method-SOMADataFrame-clone}{\code{SOMADataFrame$clone()}} } @@ -44,6 +46,7 @@ row and is intended to act as a join key for other objects, such as
  • tiledbsoma::TileDBArray$index_column_names()
  • tiledbsoma::TileDBArray$ndim()
  • tiledbsoma::TileDBArray$non_empty_domain()
  • +
  • tiledbsoma::TileDBArray$non_empty_domain_new()
  • tiledbsoma::TileDBArray$open()
  • tiledbsoma::TileDBArray$print()
  • tiledbsoma::TileDBArray$schema()
  • @@ -63,6 +66,7 @@ Create (lifecycle: maturing) \if{html}{\out{
    }}\preformatted{SOMADataFrame$create( schema, index_column_names = c("soma_joinid"), + domain = NULL, platform_config = NULL, internal_use_only = NULL )}\if{html}{\out{
    }} @@ -77,6 +81,15 @@ Create (lifecycle: maturing) index columns. All named columns must exist in the schema, and at least one index column name is required.} +\item{\code{domain}}{An optional list of 2-element vectors specifying the domain of each index +column. Each vector should be a pair consisting of the minimum and maximum values storable in +the index column. For example, if there is a single int64-valued index column, then \code{domain} +might be \code{c(100, 200)} to indicate that values between 100 and 200, inclusive, can be stored +in that column. If provided, this list must have the same length as \code{index_column_names}, +and the index-column domain will be as specified. If omitted entirely, or if \code{NULL} in a given +dimension, the corresponding index-column domain will use the minimum and maximum possible +values for the column's datatype. This makes a \code{DataFrame} growable.} + \item{\code{platform_config}}{A \link[tiledbsoma:PlatformConfig]{platform configuration} object} @@ -219,6 +232,38 @@ None, instead a \code{\link{.NotYetImplemented}()} error is raised } } \if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMADataFrame-domain}{}}} +\subsection{Method \code{domain()}}{ +Returns a named list of minimum/maximum pairs, one per index +column, currently storable on each index column of the dataframe. These +can be resized up to \code{maxdomain}. +(lifecycle: maturing) +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMADataFrame$domain()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +Named list of minimum/maximum values. +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMADataFrame-maxdomain}{}}} +\subsection{Method \code{maxdomain()}}{ +Returns a named list of minimum/maximum pairs, one per index +column, which are the limits up to which the dataframe can have its +domain resized. +(lifecycle: maturing) +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMADataFrame$maxdomain()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +Named list of minimum/maximum values. +} +} +\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SOMADataFrame-tiledbsoma_has_upgraded_domain}{}}} \subsection{Method \code{tiledbsoma_has_upgraded_domain()}}{ diff --git a/apis/r/man/SOMADataFrameCreate.Rd b/apis/r/man/SOMADataFrameCreate.Rd index 36a6cfbdbd..94b6ed5fe2 100644 --- a/apis/r/man/SOMADataFrameCreate.Rd +++ b/apis/r/man/SOMADataFrameCreate.Rd @@ -8,6 +8,7 @@ SOMADataFrameCreate( uri, schema, index_column_names = c("soma_joinid"), + domain = NULL, ingest_mode = c("write", "resume"), platform_config = NULL, tiledbsoma_ctx = NULL, @@ -24,6 +25,15 @@ SOMADataFrameCreate( index columns; all named columns must exist in the schema, and at least one index column name is required} +\item{domain}{An optional list of 2-element vectors specifying the domain of each index +column. Each vector should be a pair consisting of the minimum and maximum values storable in +the index column. For example, if there is a single int64-valued index column, then \code{domain} +might be \code{c(100, 200)} to indicate that values between 100 and 200, inclusive, can be stored +in that column. If provided, this list must have the same length as \code{index_column_names}, +and the index-column domain will be as specified. If omitted entirely, or if \code{NULL} in a given +dimension, the corresponding index-column domain will use the minimum and maximum possible +values for the column's datatype. This makes a \code{DataFrame} growable.} + \item{ingest_mode}{Ingestion mode when creating the TileDB object; choose from: \itemize{ \item \dQuote{\code{write}}: create a new TileDB object and error if it already exists diff --git a/apis/r/man/SOMADenseNDArray.Rd b/apis/r/man/SOMADenseNDArray.Rd index ea74350990..b57740ed72 100644 --- a/apis/r/man/SOMADenseNDArray.Rd +++ b/apis/r/man/SOMADenseNDArray.Rd @@ -25,7 +25,7 @@ The \code{write} method is currently limited to writing from 2-d matrices. (lifecycle: maturing) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{tiledbsoma::SOMANDArrayBase} -> \code{SOMADenseNDArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{\link[tiledbsoma:SOMANDArrayBase]{tiledbsoma::SOMANDArrayBase}} -> \code{SOMADenseNDArray} } \section{Methods}{ \subsection{Public methods}{ @@ -56,6 +56,7 @@ The \code{write} method is currently limited to writing from 2-d matrices.
  • tiledbsoma::TileDBArray$maxshape()
  • tiledbsoma::TileDBArray$ndim()
  • tiledbsoma::TileDBArray$non_empty_domain()
  • +
  • tiledbsoma::TileDBArray$non_empty_domain_new()
  • tiledbsoma::TileDBArray$open()
  • tiledbsoma::TileDBArray$print()
  • tiledbsoma::TileDBArray$schema()
  • diff --git a/apis/r/man/SOMAExperiment.Rd b/apis/r/man/SOMAExperiment.Rd index 47698f979c..ab51a1fa34 100644 --- a/apis/r/man/SOMAExperiment.Rd +++ b/apis/r/man/SOMAExperiment.Rd @@ -23,7 +23,7 @@ default platform configuration by passing a custom configuration to the } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMAExperiment} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMAExperiment} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMAMeasurement.Rd b/apis/r/man/SOMAMeasurement.Rd index f502f39423..f642b0c927 100644 --- a/apis/r/man/SOMAMeasurement.Rd +++ b/apis/r/man/SOMAMeasurement.Rd @@ -23,7 +23,7 @@ default platform configuration by passing a custom configuration to the } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMAMeasurement} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMAMeasurement} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMANDArrayBase.Rd b/apis/r/man/SOMANDArrayBase.Rd index 7cf691f5c8..31e85127c2 100644 --- a/apis/r/man/SOMANDArrayBase.Rd +++ b/apis/r/man/SOMANDArrayBase.Rd @@ -9,7 +9,7 @@ Adds NDArray-specific functionality to the \code{\link{SOMAArrayBase}} class. } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{SOMANDArrayBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{SOMANDArrayBase} } \section{Methods}{ \subsection{Public methods}{ @@ -40,6 +40,7 @@ Adds NDArray-specific functionality to the \code{\link{SOMAArrayBase}} class.
  • tiledbsoma::TileDBArray$maxshape()
  • tiledbsoma::TileDBArray$ndim()
  • tiledbsoma::TileDBArray$non_empty_domain()
  • +
  • tiledbsoma::TileDBArray$non_empty_domain_new()
  • tiledbsoma::TileDBArray$open()
  • tiledbsoma::TileDBArray$print()
  • tiledbsoma::TileDBArray$schema()
  • diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index c6c605df0f..bd9a0f810b 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -23,7 +23,7 @@ the object are overwritten and new index values are added. (lifecycle: maturing) } } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{tiledbsoma::SOMANDArrayBase} -> \code{SOMASparseNDArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{\link[tiledbsoma:SOMANDArrayBase]{tiledbsoma::SOMANDArrayBase}} -> \code{SOMASparseNDArray} } \section{Methods}{ \subsection{Public methods}{ @@ -56,6 +56,7 @@ the object are overwritten and new index values are added. (lifecycle: maturing)
  • tiledbsoma::TileDBArray$maxshape()
  • tiledbsoma::TileDBArray$ndim()
  • tiledbsoma::TileDBArray$non_empty_domain()
  • +
  • tiledbsoma::TileDBArray$non_empty_domain_new()
  • tiledbsoma::TileDBArray$open()
  • tiledbsoma::TileDBArray$print()
  • tiledbsoma::TileDBArray$schema()
  • diff --git a/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd index b972bab4be..771ea59fe7 100644 --- a/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd +++ b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd @@ -8,7 +8,7 @@ Blockwise reader for \code{\link{SOMASparseNDArray}} } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::SOMASparseNDArrayReadBase} -> \code{SOMASparseNDArrayBlockwiseRead} +\code{\link[tiledbsoma:SOMASparseNDArrayReadBase]{tiledbsoma::SOMASparseNDArrayReadBase}} -> \code{SOMASparseNDArrayBlockwiseRead} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMASparseNDArrayRead.Rd b/apis/r/man/SOMASparseNDArrayRead.Rd index 049c07c548..dde58be7e7 100644 --- a/apis/r/man/SOMASparseNDArrayRead.Rd +++ b/apis/r/man/SOMASparseNDArrayRead.Rd @@ -8,7 +8,7 @@ Intermediate type to choose result format when reading a sparse array } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::SOMASparseNDArrayReadBase} -> \code{SOMASparseNDArrayRead} +\code{\link[tiledbsoma:SOMASparseNDArrayReadBase]{tiledbsoma::SOMASparseNDArrayReadBase}} -> \code{SOMASparseNDArrayRead} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMATileDBContext.Rd b/apis/r/man/SOMATileDBContext.Rd index ed757e072f..fb18714cd9 100644 --- a/apis/r/man/SOMATileDBContext.Rd +++ b/apis/r/man/SOMATileDBContext.Rd @@ -7,7 +7,7 @@ Context map for TileDB-backed SOMA objects } \section{Super classes}{ -\code{tiledbsoma::MappingBase} -> \code{tiledbsoma::ScalarMap} -> \code{tiledbsoma::SOMAContextBase} -> \code{SOMATileDBContext} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{\link[tiledbsoma:ScalarMap]{tiledbsoma::ScalarMap}} -> \code{\link[tiledbsoma:SOMAContextBase]{tiledbsoma::SOMAContextBase}} -> \code{SOMATileDBContext} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/ScalarMap.Rd b/apis/r/man/ScalarMap.Rd index 05adb64655..a02837c47b 100644 --- a/apis/r/man/ScalarMap.Rd +++ b/apis/r/man/ScalarMap.Rd @@ -10,7 +10,7 @@ optionally be limited further to a specific atomic vector type } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{ScalarMap} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{ScalarMap} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd index 84951e20cd..adbe82bfbb 100644 --- a/apis/r/man/SparseReadIter.Rd +++ b/apis/r/man/SparseReadIter.Rd @@ -9,7 +9,7 @@ a reads on \link{SOMASparseNDArray}. Iteration chunks are retrieved as 0-based Views \link{matrixZeroBasedView} of Matrix::\link[Matrix]{sparseMatrix}. } \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{SparseReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{SparseReadIter} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/TableReadIter.Rd b/apis/r/man/TableReadIter.Rd index 0699b8dc8a..2dfd2cad17 100644 --- a/apis/r/man/TableReadIter.Rd +++ b/apis/r/man/TableReadIter.Rd @@ -9,7 +9,7 @@ a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}. Iteration chunks are retrieved as arrow::\link[arrow]{Table} } \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{TableReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{TableReadIter} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/TileDBArray.Rd b/apis/r/man/TileDBArray.Rd index 423dceeee0..9ef26bdcc7 100644 --- a/apis/r/man/TileDBArray.Rd +++ b/apis/r/man/TileDBArray.Rd @@ -9,7 +9,7 @@ Base class for representing an individual TileDB array. } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::TileDBObject} -> \code{TileDBArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{TileDBArray} } \section{Active bindings}{ \if{html}{\out{
    }} @@ -35,6 +35,7 @@ Base class for representing an individual TileDB array. \item \href{#method-TileDBArray-maxshape}{\code{TileDBArray$maxshape()}} \item \href{#method-TileDBArray-used_shape}{\code{TileDBArray$used_shape()}} \item \href{#method-TileDBArray-non_empty_domain}{\code{TileDBArray$non_empty_domain()}} +\item \href{#method-TileDBArray-non_empty_domain_new}{\code{TileDBArray$non_empty_domain_new()}} \item \href{#method-TileDBArray-ndim}{\code{TileDBArray$ndim()}} \item \href{#method-TileDBArray-attributes}{\code{TileDBArray$attributes()}} \item \href{#method-TileDBArray-dimnames}{\code{TileDBArray$dimnames()}} @@ -281,6 +282,26 @@ each dimension. } } \if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TileDBArray-non_empty_domain_new}{}}} +\subsection{Method \code{non_empty_domain_new()}}{ +Returns a named list of minimum/maximum pairs, one per index +column, which are the smallest and largest values written on that +index column. + +As tracked on https://github.com/single-cell-data/TileDB-SOMA/issues/2407 +this will replace the existing \code{non_empty_domain} method. + +(lifecycle: maturing) +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{TileDBArray$non_empty_domain_new()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +Named list of minimum/maximum values. +} +} +\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TileDBArray-ndim}{}}} \subsection{Method \code{ndim()}}{ diff --git a/apis/r/man/TileDBCreateOptions.Rd b/apis/r/man/TileDBCreateOptions.Rd index 02d2e32ef5..6604d11722 100644 --- a/apis/r/man/TileDBCreateOptions.Rd +++ b/apis/r/man/TileDBCreateOptions.Rd @@ -60,7 +60,7 @@ tdco$attr_filters("non-existant") } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{TileDBCreateOptions} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{TileDBCreateOptions} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/TileDBGroup.Rd b/apis/r/man/TileDBGroup.Rd index 1729e2e2ab..0a5cc623c2 100644 --- a/apis/r/man/TileDBGroup.Rd +++ b/apis/r/man/TileDBGroup.Rd @@ -11,7 +11,7 @@ Base class for interacting with TileDB groups (lifecycle: maturing) } \keyword{internal} \section{Super class}{ -\code{tiledbsoma::TileDBObject} -> \code{TileDBGroup} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{TileDBGroup} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/src/RcppExports.cpp b/apis/r/src/RcppExports.cpp index ad953fe658..832bdb4199 100644 --- a/apis/r/src/RcppExports.cpp +++ b/apis/r/src/RcppExports.cpp @@ -488,6 +488,18 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// resize_soma_joinid +void resize_soma_joinid(const std::string& uri, Rcpp::NumericVector new_shape, Rcpp::XPtr ctxxp); +RcppExport SEXP _tiledbsoma_resize_soma_joinid(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP ctxxpSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type new_shape(new_shapeSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type ctxxp(ctxxpSEXP); + resize_soma_joinid(uri, new_shape, ctxxp); + return R_NilValue; +END_RCPP +} // tiledbsoma_upgrade_shape void tiledbsoma_upgrade_shape(const std::string& uri, Rcpp::NumericVector new_shape, Rcpp::XPtr ctxxp); RcppExport SEXP _tiledbsoma_tiledbsoma_upgrade_shape(SEXP uriSEXP, SEXP new_shapeSEXP, SEXP ctxxpSEXP) { @@ -708,6 +720,7 @@ static const R_CallMethodDef CallEntries[] = { {"_tiledbsoma_ndim", (DL_FUNC) &_tiledbsoma_ndim, 2}, {"_tiledbsoma_c_dimnames", (DL_FUNC) &_tiledbsoma_c_dimnames, 2}, {"_tiledbsoma_resize", (DL_FUNC) &_tiledbsoma_resize, 3}, + {"_tiledbsoma_resize_soma_joinid", (DL_FUNC) &_tiledbsoma_resize_soma_joinid, 3}, {"_tiledbsoma_tiledbsoma_upgrade_shape", (DL_FUNC) &_tiledbsoma_tiledbsoma_upgrade_shape, 3}, {"_tiledbsoma_sr_setup", (DL_FUNC) &_tiledbsoma_sr_setup, 10}, {"_tiledbsoma_sr_complete", (DL_FUNC) &_tiledbsoma_sr_complete, 1}, diff --git a/apis/r/src/rinterface.cpp b/apis/r/src/rinterface.cpp index 12eb0f3d6f..2386db3324 100644 --- a/apis/r/src/rinterface.cpp +++ b/apis/r/src/rinterface.cpp @@ -389,6 +389,18 @@ void resize( sr->close(); } +// [[Rcpp::export]] +void resize_soma_joinid( + const std::string& uri, + Rcpp::NumericVector new_shape, + Rcpp::XPtr ctxxp) { + // This function is solely for SOMADataFrame. + auto sr = tdbs::SOMADataFrame::open(uri, OpenMode::write, ctxxp->ctxptr); + std::vector new_shape_i64 = i64_from_rcpp_numeric(new_shape); + sr->resize_soma_joinid(new_shape_i64[0]); + sr->close(); +} + // [[Rcpp::export]] void tiledbsoma_upgrade_shape( const std::string& uri, diff --git a/apis/r/src/rutilities.cpp b/apis/r/src/rutilities.cpp index efdc0566ed..913d00b6a7 100644 --- a/apis/r/src/rutilities.cpp +++ b/apis/r/src/rutilities.cpp @@ -4,11 +4,9 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for R interface to C++ -#include // for C interface to Arrow (via R package) -#include // for C interface to Arrow -#include // for fromInteger64 -#include +#include // for R interface to C++ +#include // for C interface to Arrow +#include // for C interface to Arrow (via R package) #include #include // for fromInteger64 #include @@ -471,10 +469,29 @@ SEXP convert_domainish(const tdbs::ArrowTable& arrow_table) { "Bad array children alloc"); for (size_t i = 0; i < ncol; i++) { - spdl::info( - "[domainish] name {} length {}", - std::string(arrow_schema->children[i]->name), - arrow_array->children[i]->length); + if (arrow_array->children[i]->n_buffers == 3) { + // Arrow semantics: variable-length: buffers 0,1,2 are validity, + // offsets, data + std::vector + lohi = tiledbsoma::ArrowAdapter::get_array_string_column( + arrow_array->children[i], arrow_schema->children[i]); + spdl::info( + "[domainish] name {} format {} length {} lo {} hi {}", + std::string(arrow_schema->children[i]->name), + std::string(arrow_schema->children[i]->format), + arrow_array->children[i]->length, + lohi[0], + lohi[1]); + } else { + // Arrow semantics: non-variable-length: buffers 0,1 are validity & + // data + spdl::info( + "[domainish] name {} format {} length {}", + std::string(arrow_schema->children[i]->name), + std::string(arrow_schema->children[i]->format), + arrow_array->children[i]->length); + } + ArrowArrayMove(arrow_array->children[i], arr->children[i]); ArrowSchemaMove(arrow_schema->children[i], sch->children[i]); } diff --git a/apis/r/tests/testthat/test-shape.R b/apis/r/tests/testthat/test-shape.R index 7526f4d2af..f4959f18d8 100644 --- a/apis/r/tests/testthat/test-shape.R +++ b/apis/r/tests/testthat/test-shape.R @@ -1,108 +1,370 @@ test_that("SOMADataFrame shape", { - #uri <- withr::local_tempdir("soma-dataframe-shape") - uri <- "/tmp/fooze" asch <- create_arrow_schema() index_column_name_choices = list( "soma_joinid", c("soma_joinid", "int_column"), c("soma_joinid", "string_column"), + c("string_column", "int_column"), # duplicate intentional to match `domain_at_create_choices` c("string_column", "int_column") ) - for (i in seq_along(index_column_name_choices)) { - index_column_names <- index_column_name_choices[[i]] - - has_soma_joinid_dim <- "soma_joinid" %in% index_column_names - - if (dir.exists(uri)) unlink(uri, recursive=TRUE) - - # TODO: test create with specified domain on PR 3032 - sdf <- SOMADataFrameCreate(uri, asch, index_column_names = index_column_names) - expect_true(sdf$exists()) - expect_true(dir.exists(uri)) - - tbl0 <- arrow::arrow_table(int_column = 1L:4L, - soma_joinid = 1L:4L, - float_column = 1.1:4.1, - string_column = c("apple", "ball", "cat", "dog"), - schema = asch) - - sdf$write(tbl0) - sdf$close() - - sdf <- SOMADataFrameOpen(uri) - - if (.new_shape_feature_flag_is_enabled()) { - expect_true(sdf$tiledbsoma_has_upgraded_domain()) - } else { - expect_false(sdf$tiledbsoma_has_upgraded_domain()) - } - expect_error(sdf$shape(), class = "notYetImplementedError") - expect_error(sdf$maxshape(), class = "notYetImplementedError") - - # Not implemented this way per - # https://github.com/single-cell-data/TileDB-SOMA/pull/2953#discussion_r1746125089 - # sjid_shape <- sdf$.maybe_soma_joinid_shape() - # sjid_maxshape <- sdf$.maybe_soma_joinid_maxshape() - soma_context <- soma_context() - sjid_shape <- maybe_soma_joinid_shape(sdf$uri, soma_context) - sjid_maxshape <- maybe_soma_joinid_maxshape(sdf$uri, soma_context) - - if (has_soma_joinid_dim) { - # More testing to come on - # https://github.com/single-cell-data/TileDB-SOMA/issues/2407 - expect_false(is.na(sjid_shape)) - expect_false(is.na(sjid_maxshape)) - } else { - expect_true(is.na(sjid_shape)) - expect_true(is.na(sjid_maxshape)) - } - - dom <- sdf$domain() - mxd <- sdf$maxdomain() - - # First check names - expect_equal(names(dom), index_column_names) - expect_equal(names(mxd), index_column_names) - - # Then check all slots are pairs - for (name in names(dom)) { - expect_length(dom[[name]], 2L) - expect_length(mxd[[name]], 2L) - } + domain_at_create_choices = list( + list(soma_joinid = c(0, 999)), + list(soma_joinid = c(0, 999), int_column = c(-10000, 10000)), + list(soma_joinid = c(0, 999), string_column = NULL), + list(string_column = NULL, int_column = c(-10000, 10000)), + list(string_column = c("apple", "zebra"), int_column = c(-10000, 10000)) + ) - # Then check contents - if ("soma_joinid" %in% index_column_names) { - sjid_dom <- dom[["soma_joinid"]] - sjid_mxd <- mxd[["soma_joinid"]] - expect_equal(sjid_dom[[1]], 0) - expect_equal(sjid_mxd[[1]], 0) - # Really big number; exact value unimportant - # TODO: test create with specified domain on PR 3032 - # -- then, current and max domain will be different - expect_true(sjid_dom[[2]] > bit64::as.integer64(10000000000)) - expect_true(sjid_mxd[[2]] > bit64::as.integer64(10000000000)) - } + # Check the test configs themselves to make sure someone (ahem, me) + # didn't edit one without forgetting to edit the other + expect_equal(length(index_column_name_choices), length(domain_at_create_choices)) - if ("int_column" %in% index_column_names) { - int_dom <- dom[["int_column"]] - int_mxd <- mxd[["int_column"]] - expect_true(int_dom[[1]] < -2000000000) - expect_true(int_dom[[2]] > 2000000000) - } + for (i in seq_along(index_column_name_choices)) { + index_column_names <- index_column_name_choices[[i]] - if ("string_column" %in% index_column_names) { - expect_equal(dom[["string_column"]], c("", "")) - expect_equal(mxd[["string_column"]], c("", "")) + for (use_domain_at_create in c(FALSE, TRUE)) { + + uri <- withr::local_tempdir("soma-dataframe-shape") + + # Create + if (dir.exists(uri)) unlink(uri, recursive=TRUE) + + domain_for_create <- NULL + if (use_domain_at_create) { + domain_for_create <- domain_at_create_choices[[i]] + } + + sdf <- SOMADataFrameCreate( + uri, + asch, + index_column_names = index_column_names, + domain = domain_for_create) + + expect_true(sdf$exists()) + expect_true(dir.exists(uri)) + + # Write + tbl0 <- arrow::arrow_table(int_column = 1L:4L, + soma_joinid = 1L:4L, + float_column = 1.1:4.1, + string_column = c("apple", "ball", "cat", "dog"), + schema = asch) + + sdf$write(tbl0) + sdf$close() + + sdf <- SOMADataFrameOpen(uri) + + # Check shape and maxshape et al. + if (!.new_shape_feature_flag_is_enabled()) { + expect_false(sdf$tiledbsoma_has_upgraded_domain()) + } else { + expect_true(sdf$tiledbsoma_has_upgraded_domain()) + } + expect_error(sdf$shape(), class = "notYetImplementedError") + expect_error(sdf$maxshape(), class = "notYetImplementedError") + + # Not implemented this way per + # https://github.com/single-cell-data/TileDB-SOMA/pull/2953#discussion_r1746125089 + # sjid_shape <- sdf$.maybe_soma_joinid_shape() + # sjid_maxshape <- sdf$.maybe_soma_joinid_maxshape() + soma_context <- soma_context() + sjid_shape <- maybe_soma_joinid_shape(sdf$uri, soma_context) + sjid_maxshape <- maybe_soma_joinid_maxshape(sdf$uri, soma_context) + + if ("soma_joinid" %in% index_column_names) { + # More testing to come on + # https://github.com/single-cell-data/TileDB-SOMA/issues/2407 + expect_false(rlang::is_na(sjid_shape)) + expect_false(rlang::is_na(sjid_maxshape)) + } else { + expect_true(rlang::is_na(sjid_shape)) + expect_true(rlang::is_na(sjid_maxshape)) + } + + # Check has_upgraded_domain + if (!.new_shape_feature_flag_is_enabled()) { + expect_false(sdf$tiledbsoma_has_upgraded_domain()) + } else { + expect_true(sdf$tiledbsoma_has_upgraded_domain()) + } + + # Check domain and maxdomain + dom <- sdf$domain() + mxd <- sdf$maxdomain() + + # First check names + expect_equal(names(dom), index_column_names) + expect_equal(names(mxd), index_column_names) + + # Then check all slots are pairs + for (name in names(dom)) { + expect_length(dom[[name]], 2L) + expect_length(mxd[[name]], 2L) + } + + # Then check contents + + # Old shape/domainishes (without core current domain) for non-string dims: + # * There is no core current domain + # * Expect domain == maxdomain + # * If they asked for NULL: both should be huge (near min/max for datatype) + # * If they asked for something specific: they should get it + # + # New shape/domainishes (with core current domain) for non-string dims: + # * Maxdomain should be huge (near min/max for datatype) + # * If they asked for NULL: domain should be the same as maxdomain + # * If they asked for a specific domain: they should get it + # + # Old shape/domainishes (without core current domain) for string dims: + # * There is no core current domain + # * Expect domain == maxdomain + # * Core domain for strings is always ("", "") + # + # New shape/domainishes (with core current domain) for string dims: + # * Core domain (soma maxdomain) for strings is always ("", "") + # * Core current domain (soma domain) for strings: + # o If they asked for NULL: expect ("", "") + # o If they asked for something specific: they should get it + + if ("soma_joinid" %in% index_column_names) { + sjid_dom <- dom[["soma_joinid"]] + sjid_mxd <- mxd[["soma_joinid"]] + sjid_dfc <- domain_for_create[["soma_joinid"]] + + if (!.new_shape_feature_flag_is_enabled()) { + # Old behavior + expect_equal(sjid_dom, sjid_mxd) + } + + if (!use_domain_at_create) { + expect_equal(sjid_dom[[1]], 0) + expect_equal(sjid_mxd[[1]], 0) + # This is a really big number in the ballpark of 2**63; its exact + # value is unimportant. + expect_true(sjid_dom[[2]] > bit64::as.integer64(10000000000)) + expect_true(sjid_mxd[[2]] > bit64::as.integer64(10000000000)) + } else { + # Not: expect_equal(sjid_dom, bit64::as.integer64(sjid_dfc)) The + # soma_joinid dim is always of type int64. Everything coming back + # from libtiledbsoma, through C nanoarrow, through the R arrow + # package, to Arrow RecordBatch, holds true to that. But the final + # as.list() converts the domain to regular integer. This is a feature + # TBH: suppressable with `op <- options(arrow.int64_downcast = + # FALSE)`. The maxdomainis likely to be in the 2**63 range + # but the domain is likely to be ordinary-sized numbers in the + # thousands or millions. Users are likely to prefer these + # being downcast to regular R integers. + expect_equal(sjid_dom, sjid_dfc) + } + } + + if ("int_column" %in% index_column_names) { + int_dom <- dom[["int_column"]] + int_mxd <- mxd[["int_column"]] + int_dfc <- domain_for_create[["int_column"]] + + if (!.new_shape_feature_flag_is_enabled()) { + # Old behavior + expect_equal(int_dom, int_mxd) + } + + if (!use_domain_at_create) { + expect_true(int_dom[[1]] < -2000000000) + expect_true(int_dom[[2]] > 2000000000) + } else { + expect_equal(int_dom, int_dfc) + } + + if (!.new_shape_feature_flag_is_enabled()) { + if (!use_domain_at_create) { + expect_true(int_mxd[[1]] < -2000000000) + expect_true(int_mxd[[2]] > 2000000000) + } else { + expect_equal(int_mxd, int_dfc) + } + } else { + expect_true(int_mxd[[1]] < -2000000000) + expect_true(int_mxd[[2]] > 2000000000) + } + } + + if ("string_column" %in% index_column_names) { + str_dom <- dom[["string_column"]] + str_mxd <- mxd[["string_column"]] + str_dfc <- domain_for_create[["string_column"]] + + if (!.new_shape_feature_flag_is_enabled()) { + expect_equal(str_dom, c("", "")) + expect_equal(str_mxd, c("", "")) + + } else { + if (!use_domain_at_create) { + expect_equal(str_dom, c("", "")) + } else { + if (is.null(str_dfc)) { + expect_equal(str_dom, c("", "")) + } else { + expect_equal(str_dom, str_dfc) + } + } + expect_equal(str_mxd, c("", "")) + } + } + + sdf$close() + + # Test resize for dataframes (more general upgrade_domain to be tested + # separately -- see https://github.com/single-cell-data/TileDB-SOMA/issues/2407) + if (.new_shape_feature_flag_is_enabled() && use_domain_at_create) { + has_soma_joinid_dim <- "soma_joinid" %in% index_column_names + sjid_dfc <- domain_for_create[["soma_joinid"]] + + # Test resize down + new_shape <- 0 + sdf <- SOMADataFrameOpen(uri, "WRITE") + if (has_soma_joinid_dim) { + # It's an error to downsize + expect_error(sdf$resize_soma_joinid(new_shape)) + } else { + # There is no problem when soma_joinid is not a dim -- + # sdf$resize_soma_joinid is a no-op in that case + expect_no_condition(sdf$resize_soma_joinid(new_shape)) + } + sdf$close() + + # Make sure the failed resize really didn't change the shape + if (has_soma_joinid_dim) { + sdf <- SOMADataFrameOpen(uri, "READ") + expect_equal(sdf$domain()[["soma_joinid"]], sjid_dfc) + sdf$close() + } + + # Test writes out of bounds, before resize + old_shape <- 100 + if (has_soma_joinid_dim) { + old_shape <- domain_for_create[["soma_joinid"]][[2]] + 1 + 100 + } + new_shape <- old_shape + 100 + + tbl1 <- arrow::arrow_table( + int_column = 5L:8L, + soma_joinid = (old_shape+1L):(old_shape+4L), + float_column = 5.1:8.1, + string_column = c("egg", "flag", "geese", "hay"), + schema = asch) + + sdf <- SOMADataFrameOpen(uri, "WRITE") + if (has_soma_joinid_dim) { + expect_error(sdf$write(tbl1)) + } else { + expect_no_condition(sdf$write(tbl1)) + } + sdf$close() + + # Test resize + sdf <- SOMADataFrameOpen(uri, "WRITE") + sdf$resize_soma_joinid(new_shape) + sdf$close(); + + # Test writes out of old bounds, within new bounds, after resize + sdf <- SOMADataFrameOpen(uri, "WRITE") + expect_no_condition(sdf$write(tbl1)) + sdf$close(); + + # To do: test readback + + rm(tbl1) + } + + rm(sdf, tbl0) + + gc() } - - sdf$close() - - rm(sdf, tbl0) - - gc() } + + # Test `domain` assertions + uri <- tempfile() + + # `domain` must be `NULL` or a list + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = NA + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = 1L + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = bit64::as.integer64(c(0L, 99L)) + )) + # `domain` may not be an empty list + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list() + )) + # `domain` must be named + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(bit64::as.integer64(c(0L, 99L))) + )) + # `domain` must be a list of two-length atomics + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(soma_joinid = list(bit64::as.integer64(0L), bit64::as.integer64(99L))) + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(soma_joinid = NA) + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(soma_joinid = numeric()) + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(soma_joinid = numeric(length = 3L)) + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = c("soma_joinid", "int_column"), + domain = list(soma_joinid = NULL, int_column = data.frame()) + )) + # `names(domain)` must be identical to `index_column_names` + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = "soma_joinid", + domain = list(soma_joinid = NULL, int_column = NULL) + )) + expect_error(SOMADataFrameCreate( + uri, + schema = asch, + index_column_names = c("soma_joinid", "int_column"), + domain = list(soma_joinid = NULL) + )) }) test_that("SOMASparseNDArray shape", { diff --git a/apis/r/tests/testthat/test-write-soma-resume.R b/apis/r/tests/testthat/test-write-soma-resume.R index 4a2aaf517c..4e70dabea9 100644 --- a/apis/r/tests/testthat/test-write-soma-resume.R +++ b/apis/r/tests/testthat/test-write-soma-resume.R @@ -164,6 +164,11 @@ test_that("Resume-mode data frames", { } } + if (.new_shape_feature_flag_is_enabled()) { + sdfp$reopen("WRITE") + sdfp$resize_soma_joinid(nrow(co2)) + } + expect_s3_class( sdfc <- write_soma( co2, diff --git a/apis/r/tools/get_tarball.R b/apis/r/tools/get_tarball.R index 96c3a97d1d..a9dba686aa 100644 --- a/apis/r/tools/get_tarball.R +++ b/apis/r/tools/get_tarball.R @@ -14,14 +14,14 @@ isLinux <- Sys.info()["sysname"] == "Linux" if (isMac) { arch <- system('uname -m', intern = TRUE) if (arch == "x86_64") { - url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-x86_64-2.26.1-db1cee4.tar.gz" + url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-x86_64-2.26.2-30fc114.tar.gz" } else if (arch == "arm64") { - url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-arm64-2.26.1-db1cee4.tar.gz" + url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-arm64-2.26.2-30fc114.tar.gz" } else { stop("Unsupported Mac architecture. Please have TileDB Core installed locally.") } } else if (isLinux) { - url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-linux-x86_64-2.26.1-db1cee4.tar.gz" + url <- "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-linux-x86_64-2.26.2-30fc114.tar.gz" } else { message("Unsupported platform for downloading artifacts. Please have TileDB Core installed locally.") q(save = "no", status = 1) diff --git a/doc/requirements_doc.txt b/doc/requirements_doc.txt index e49a2c91a3..18f27db1d4 100644 --- a/doc/requirements_doc.txt +++ b/doc/requirements_doc.txt @@ -6,7 +6,7 @@ jinja2==3.1.4 nbsphinx==0.9.3 pandoc==2.3 pybind11==2.12.0 -setuptools==70.0.0 +setuptools==75.1.0 setuptools-scm==8.1.0 sphinx==7.3.7 sphinx-rtd-theme==2.0.0 diff --git a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake index d6f7487506..75030c967c 100644 --- a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake +++ b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake @@ -58,8 +58,8 @@ else() # NB When updating the pinned URLs here, please also update in file apis/r/tools/get_tarball.R if(DOWNLOAD_TILEDB_PREBUILT) if (WIN32) # Windows - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-windows-x86_64-2.26.1-db1cee4.zip") - SET(DOWNLOAD_SHA1 "b5b909511adf4761d546a865839ca92d0ea4f780") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-windows-x86_64-2.26.2-30fc114.zip") + SET(DOWNLOAD_SHA1 "43f1cf15c268b8a1367699f84b1138236b9b3e07") elseif(APPLE) # OSX # Status quo as of 2023-05-18: @@ -76,22 +76,22 @@ else() # o CMAKE_SYSTEM_PROCESSOR is x86_64 if (CMAKE_OSX_ARCHITECTURES STREQUAL x86_64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-x86_64-2.26.1-db1cee4.tar.gz") - SET(DOWNLOAD_SHA1 "ca9b3e350e3548120b3debd20eaf440b4d8f2c65") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-x86_64-2.26.2-30fc114.tar.gz") + SET(DOWNLOAD_SHA1 "6af72d48812792ef815634bb544ef0e332493bf2") elseif (CMAKE_OSX_ARCHITECTURES STREQUAL arm64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-arm64-2.26.1-db1cee4.tar.gz") - SET(DOWNLOAD_SHA1 "0fb117949b7d16213d8b1606ffddf4693745012d") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-arm64-2.26.2-30fc114.tar.gz") + SET(DOWNLOAD_SHA1 "f9bc0696cfeab3c87dc0a190109725320fdfcfdf") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-x86_64-2.26.1-db1cee4.tar.gz") - SET(DOWNLOAD_SHA1 "ca9b3e350e3548120b3debd20eaf440b4d8f2c65") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-x86_64-2.26.2-30fc114.tar.gz") + SET(DOWNLOAD_SHA1 "6af72d48812792ef815634bb544ef0e332493bf2") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-macos-arm64-2.26.1-db1cee4.tar.gz") - SET(DOWNLOAD_SHA1 "0fb117949b7d16213d8b1606ffddf4693745012d") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-macos-arm64-2.26.2-30fc114.tar.gz") + SET(DOWNLOAD_SHA1 "f9bc0696cfeab3c87dc0a190109725320fdfcfdf") endif() else() # Linux - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.1/tiledb-linux-x86_64-2.26.1-db1cee4.tar.gz") - SET(DOWNLOAD_SHA1 "e65081a0505733973b106b761f221748a9823474") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.26.2/tiledb-linux-x86_64-2.26.2-30fc114.tar.gz") + SET(DOWNLOAD_SHA1 "2c2257dedb8fa6376b0fb6dc9810b6a3b72d0dbb") endif() ExternalProject_Add(ep_tiledb @@ -113,8 +113,8 @@ else() else() # Build from source ExternalProject_Add(ep_tiledb PREFIX "externals" - URL "https://github.com/TileDB-Inc/TileDB/archive/2.26.1.zip" - URL_HASH SHA1=2cd7ec43412698dcf63b314fd04d2aa2dd5a6f23 + URL "https://github.com/TileDB-Inc/TileDB/archive/2.26.2.zip" + URL_HASH SHA1=2195ce24487d47a3be0455d482aff05311718f10 DOWNLOAD_NAME "tiledb.zip" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EP_INSTALL_PREFIX} diff --git a/libtiledbsoma/src/CMakeLists.txt b/libtiledbsoma/src/CMakeLists.txt index 89465b79ae..d853b878db 100644 --- a/libtiledbsoma/src/CMakeLists.txt +++ b/libtiledbsoma/src/CMakeLists.txt @@ -33,7 +33,7 @@ add_library(TILEDB_SOMA_OBJECTS OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_experiment.cc ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_measurement.cc ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_scene.cc - ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud.cc + ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud_dataframe.cc ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_multiscale_image.cc ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_context.cc ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_dataframe.cc @@ -135,7 +135,7 @@ endif() # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_experiment.h # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_measurement.h # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_scene.h -# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_point_cloud.h +# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_point_cloud_dataframe.h # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_multiscale_image.h # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_object.h # ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_sparse_ndarray.h @@ -158,7 +158,7 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_experiment.h ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_measurement.h ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_scene.h - ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud.h + ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud_dataframe.h ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_multiscale_image.h ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_object.h DESTINATION "include/tiledbsoma/soma" diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index d341a30e19..e59c5bc8b3 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1012,6 +1012,16 @@ std::vector SOMAArray::dimension_names() const { return result; } +bool SOMAArray::has_dimension_name(const std::string& name) const { + auto dimensions = tiledb_schema()->domain().dimensions(); + for (const auto& dim : dimensions) { + if (dim.name() == name) { + return true; + } + } + return false; +} + void SOMAArray::write(bool sort_coords) { if (mq_->query_type() != TILEDB_WRITE) { throw TileDBSOMAError("[SOMAArray] array must be opened in write mode"); @@ -1418,6 +1428,192 @@ std::vector SOMAArray::maxshape() { return _tiledb_domain(); } +// This is a helper for can_upgrade_domain and can_resize, which have +// much overlap. +std::pair SOMAArray::_can_set_shape_helper( + const std::vector& newshape, + bool is_resize, + std::string method_name_for_messages) { + // E.g. it's an error to try to upgrade_domain or resize specifying + // a 3-D shape on a 2-D array. + auto arg_ndim = newshape.size(); + auto array_ndim = arr_->schema().domain().ndim(); + if (array_ndim != arg_ndim) { + return std::pair( + false, + fmt::format( + "cannot {}: provided shape has ndim {}, while the array has {}", + method_name_for_messages, + arg_ndim, + array_ndim)); + } + + // Enforce the semantics that tiledbsoma_upgrade_domain must be called + // only on arrays that don't have a shape set, and resize must be called + // only on arrays that do. + bool has_shape = has_current_domain(); + if (is_resize) { + // They're trying to do resize on an array that doesn't already have a + // shape. + if (!has_shape) { + return std::pair( + false, + fmt::format( + "{}: array currently has no shape: please use " + "tiledbsoma_upgrade_shape.", + method_name_for_messages)); + } + } else { + // They're trying to do upgrade_shape on an array that already has a + // shape. + if (has_shape) { + return std::pair( + false, + fmt::format( + "{}: array already has a shape: please use resize rather " + "than " + "tiledbsoma_upgrade_shape.", + method_name_for_messages)); + } + } + + // * For old-style arrays without shape: core domain (soma maxdomain) may be + // small (like 100) or big (like 2 billionish). + // * For new-style arrays with shape: core current domain (soma domain) will + // probably be small and core domain (soma maxdomain) will be huge. + // + // In either case, we need to check that the user's requested shape isn't + // outside the core domain, which is immutable. For old-style arrays, + // + // if the requested shape fits in the array's core domain, it's good to go + // as a new shape. + auto domain_check = _can_set_shape_domainish_helper( + newshape, false, method_name_for_messages); + if (!domain_check.first) { + return domain_check; + } + + // For new-style arrays, we need to additionally that the the requested + // shape (core current domain) isn't a downsize of the current one. + if (has_shape) { + auto current_domain_check = _can_set_shape_domainish_helper( + newshape, true, method_name_for_messages); + if (!current_domain_check.first) { + return current_domain_check; + } + } + + return std::pair(true, ""); +} + +// This is a helper for _can_set_shape_helper: it's used for comparing +// the user's requested shape against the core current domain or core (max) +// domain. +std::pair SOMAArray::_can_set_shape_domainish_helper( + const std::vector& newshape, + bool check_current_domain, + std::string method_name_for_messages) { + Domain domain = arr_->schema().domain(); + + for (unsigned i = 0; i < domain.ndim(); i++) { + const auto& dim = domain.dimension(i); + + const std::string& dim_name = dim.name(); + + // These methods are only for SOMA NDArrays, and any other arrays for + // which the indices are entirely int64. SOMA DataFrame objects, with + // multi-type dims, need to go through upgrade_domain -- and this is + // library-internal code, it's not the user's fault if we got here. + if (dim.type() != TILEDB_INT64) { + throw TileDBSOMAError(fmt::format( + "{}: internal error: expected {} dim to " + "be {}; got {}", + method_name_for_messages, + dim_name, + tiledb::impl::type_to_str(TILEDB_INT64), + tiledb::impl::type_to_str(dim.type()))); + } + + if (check_current_domain) { + std::pair + cap = _core_current_domain_slot(dim_name); + int64_t old_dim_shape = cap.second + 1; + + if (newshape[i] < old_dim_shape) { + return std::pair( + false, + fmt::format( + "cannot {} for {}: new {} < existing shape {}", + method_name_for_messages, + dim_name, + newshape[i], + old_dim_shape)); + } + + } else { + std::pair cap = _core_domain_slot( + dim_name); + int64_t old_dim_shape = cap.second + 1; + + if (newshape[i] > old_dim_shape) { + return std::pair( + false, + fmt::format( + "cannot {} for {}: new {} < maxshape {}", + method_name_for_messages, + dim_name, + newshape[i], + old_dim_shape)); + } + } + } + return std::pair(true, ""); +} + +std::pair SOMAArray::can_resize_soma_joinid( + int64_t newshape) { + // Fail if the array doesn't already have a shape yet (they should upgrade + // first). + if (!has_current_domain()) { + return std::pair( + false, + "can_resize_soma_joinid: dataframe currently has no domain set: " + "please use tiledbsoma_upgrade_domain."); + } + + // OK if soma_joinid isn't a dim. + if (!has_dimension_name("soma_joinid")) { + return std::pair(true, ""); + } + + // Fail if the newshape isn't within the array's core current domain. + std::pair cur_dom_lo_hi = _core_current_domain_slot("soma_joinid"); + if (newshape < cur_dom_lo_hi.second) { + return std::pair( + false, + fmt::format( + "cannot resize_soma_joinid: new soma_joinid shape {} < " + "existing shape {}", + newshape, + cur_dom_lo_hi.second)); + } + + // Fail if the newshape isn't within the array's core (max) domain. + std::pair dom_lo_hi = _core_domain_slot("soma_joinid"); + if (newshape > dom_lo_hi.second) { + return std::pair( + false, + fmt::format( + "cannot resize_soma_joinid: new soma_joinid shape {} > " + "maxshape {}", + newshape, + dom_lo_hi.second)); + } + + // Sucess otherwise. + return std::pair(true, ""); +} + void SOMAArray::resize(const std::vector& newshape) { if (_get_current_domain().is_empty()) { throw TileDBSOMAError( @@ -1471,7 +1667,7 @@ void SOMAArray::_set_current_domain_from_shape( schema_evolution.array_evolve(uri_); } -void SOMAArray::maybe_resize_soma_joinid(const std::vector& newshape) { +void SOMAArray::resize_soma_joinid(int64_t newshape) { if (mq_->query_type() != TILEDB_WRITE) { throw TileDBSOMAError( "[SOMAArray::resize] array must be opened in write mode"); @@ -1480,12 +1676,6 @@ void SOMAArray::maybe_resize_soma_joinid(const std::vector& newshape) { ArraySchema schema = arr_->schema(); Domain domain = schema.domain(); unsigned ndim = domain.ndim(); - if (newshape.size() != 1) { - throw TileDBSOMAError(fmt::format( - "[SOMAArray::resize]: newshape has dimension count {}; needed 1", - newshape.size(), - ndim)); - } auto tctx = ctx_->tiledb_ctx(); CurrentDomain old_current_domain = ArraySchemaExperimental::current_domain( @@ -1498,7 +1688,7 @@ void SOMAArray::maybe_resize_soma_joinid(const std::vector& newshape) { for (unsigned i = 0; i < ndim; i++) { if (domain.dimension(i).name() == "soma_joinid") { ndrect.set_range( - domain.dimension(i).name(), 0, newshape[0] - 1); + domain.dimension(i).name(), 0, newshape - 1); } } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 5af4d0422a..43c17b18e4 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -290,6 +290,13 @@ class SOMAArray : public SOMAObject { */ std::vector dimension_names() const; + /** + * @brief Sees if the array has a dimension of the given name. + * + * @return bool + */ + bool has_dimension_name(const std::string& name) const; + /** * @brief Set the dimension slice using one point * @@ -1031,6 +1038,55 @@ class SOMAArray : public SOMAObject { */ std::vector maxshape(); + /** + * This wires up to Python/R to tell a user if they can call resize() on an + * array without error. For single arrays, they could just call resize() and + * take their chances -- but for experiment-level resize (e.g. append mode) + * it's crucial that we provide a can-we-do-them-all pass through all arrays + * in the experiment before attempting any of them. + * + * On failure, returns false and an error string suitable for showing + * to the user; on success, returns true and the empty string. + * + * Failure reasons: the requested shape's dimension-count doesn't match the + * arrays; the array doesn't have a shape set (they must call + * upgrade_shape), or the requested shape doesn't fit within the array's + * existing core domain. + */ + std::pair can_resize( + const std::vector& newshape) { + return _can_set_shape_helper(newshape, true, "resize"); + } + + /** + * This wires up to Python/R to tell a user if they can call + * upgrade_shape() on an array without error. For single dataframes, + * they could just call upgrade_shape() and take their chances -- but for + * experiment-level resize (e.g. append mode) it's crucial that we provide a + * can-we-do-them-all pass through all arrays in the experiment before + * attempting any of them. + * + * On failure, returns false and an error string suitable for showing + * to the user; on success, returns true and the empty string. + * + * Failure reasons: the requested shape's dimension-count doesn't match the + * arrays; the array already has a shape set (they must call resize), the + * requested shape doesn't fit within the array's existing core domain, or + * the requested shape is a downsize of the array's existing core current + * domain. + */ + std::pair can_upgrade_shape( + const std::vector& newshape) { + return _can_set_shape_helper( + newshape, false, "tiledbsoma_upgrade_shape"); + } + + /** + * This is similar to can_upgrade_shape, but it's a can-we call + * for maybe_resize_soma_joinid. + */ + std::pair can_resize_soma_joinid(int64_t newshape); + /** * @brief Resize the shape (what core calls "current domain") up to the * maxshape (what core calls "domain"). @@ -1066,7 +1122,7 @@ class SOMAArray : public SOMAObject { * @return Throws if the requested shape exceeds the array's create-time * maxshape. Throws if the array does not have current-domain support. */ - void maybe_resize_soma_joinid(const std::vector& newshape); + void resize_soma_joinid(int64_t newshape); protected: // These two are for use nominally by SOMADataFrame. This could be moved in @@ -1125,7 +1181,23 @@ class SOMAArray : public SOMAObject { } /** - * Helper method for resize and upgrade_shape. + * This is a code-dedupe helper for can_resize and can_upgrade_domain. + */ + std::pair _can_set_shape_helper( + const std::vector& newshape, + bool is_resize, + std::string method_name_for_messages); + + /** + * This is a second-level code-dedupe helper for _can_set_shape_helper. + */ + std::pair _can_set_shape_domainish_helper( + const std::vector& newshape, + bool check_current_domain, + std::string method_name_for_messages); + + /** + * This is a code-dedupe helper method for resize and upgrade_shape. */ void _set_current_domain_from_shape(const std::vector& newshape); diff --git a/libtiledbsoma/src/soma/soma_object.cc b/libtiledbsoma/src/soma/soma_object.cc index 646479bc2e..c610ece465 100644 --- a/libtiledbsoma/src/soma/soma_object.cc +++ b/libtiledbsoma/src/soma/soma_object.cc @@ -9,7 +9,7 @@ #include "soma_experiment.h" #include "soma_measurement.h" #include "soma_multiscale_image.h" -#include "soma_point_cloud.h" +#include "soma_point_cloud_dataframe.h" #include "soma_scene.h" #include "soma_sparse_ndarray.h" @@ -58,8 +58,11 @@ std::unique_ptr SOMAObject::open( return std::make_unique(*array_); } else if (array_type == "somadensendarray") { return std::make_unique(*array_); - } else if (array_type == "somapointcloud") { - return std::make_unique(*array_); + } else if (array_type == "somapointclouddataframe") { + return std::make_unique(*array_); + } else if (array_type == "somageometrydataframe") { + throw TileDBSOMAError( + "Support for SOMAGeometryDataFrame is not yet implemented"); } else { throw TileDBSOMAError("Saw invalid SOMAArray type"); } diff --git a/libtiledbsoma/src/soma/soma_point_cloud.cc b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc similarity index 77% rename from libtiledbsoma/src/soma/soma_point_cloud.cc rename to libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc index 4b9c429e5e..a7984712d7 100644 --- a/libtiledbsoma/src/soma/soma_point_cloud.cc +++ b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc @@ -1,11 +1,11 @@ /** - * @file soma_point_cloud.cc + * @file soma_point_cloud_dataframe.cc * * @section LICENSE * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -27,10 +27,10 @@ * * @section DESCRIPTION * - * This file defines the SOMAPointCloud class. + * This file defines the SOMAPointCloudDataFrame class. */ -#include "soma_point_cloud.h" +#include "soma_point_cloud_dataframe.h" namespace tiledbsoma { using namespace tiledb; @@ -39,7 +39,7 @@ using namespace tiledb; //= public static //=================================================================== -void SOMAPointCloud::create( +void SOMAPointCloudDataFrame::create( std::string_view uri, std::unique_ptr schema, ArrowTable index_columns, @@ -51,28 +51,29 @@ void SOMAPointCloud::create( std::move(schema), ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), - "SOMAPointCloud", + "SOMAPointCloudDataFrame", true, platform_config); - SOMAArray::create(ctx, uri, tiledb_schema, "SOMAPointCloud", timestamp); + SOMAArray::create( + ctx, uri, tiledb_schema, "SOMAPointCloudDataFrame", timestamp); } -std::unique_ptr SOMAPointCloud::open( +std::unique_ptr SOMAPointCloudDataFrame::open( std::string_view uri, OpenMode mode, std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, std::optional timestamp) { - return std::make_unique( + return std::make_unique( mode, uri, ctx, column_names, result_order, timestamp); } -bool SOMAPointCloud::exists( +bool SOMAPointCloudDataFrame::exists( std::string_view uri, std::shared_ptr ctx) { try { auto obj = SOMAObject::open(uri, OpenMode::read, ctx); - return "SOMAPointCloud" == obj->type(); + return "SOMAPointCloudDataFrame" == obj->type(); } catch (TileDBSOMAError& e) { return false; } @@ -82,15 +83,16 @@ bool SOMAPointCloud::exists( //= public non-static //=================================================================== -std::unique_ptr SOMAPointCloud::schema() const { +std::unique_ptr SOMAPointCloudDataFrame::schema() const { return this->arrow_schema(); } -const std::vector SOMAPointCloud::index_column_names() const { +const std::vector SOMAPointCloudDataFrame::index_column_names() + const { return this->dimension_names(); } -uint64_t SOMAPointCloud::count() { +uint64_t SOMAPointCloudDataFrame::count() { return this->nnz(); } diff --git a/libtiledbsoma/src/soma/soma_point_cloud.h b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.h similarity index 80% rename from libtiledbsoma/src/soma/soma_point_cloud.h rename to libtiledbsoma/src/soma/soma_point_cloud_dataframe.h index f27a92afa3..8fc92dba07 100644 --- a/libtiledbsoma/src/soma/soma_point_cloud.h +++ b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.h @@ -1,11 +1,11 @@ /** - * @file soma_point_cloud.h + * @file soma_point_cloud_dataframe.h * * @section LICENSE * * The MIT License * - * @copyright Copyright (c) 2023-2024 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -27,11 +27,11 @@ * * @section DESCRIPTION * - * This file defines the SOMAPointCloud class. + * This file defines the SOMAPointCloudDataFrame class. */ -#ifndef SOMA_POINT_CLOUD -#define SOMA_POINT_CLOUD +#ifndef SOMA_POINT_CLOUD_DATAFRAME +#define SOMA_POINT_CLOUD_DATAFRAME #include @@ -43,16 +43,16 @@ class ArrayBuffers; using namespace tiledb; -class SOMAPointCloud : public SOMAArray { +class SOMAPointCloudDataFrame : public SOMAArray { public: //=================================================================== //= public static //=================================================================== /** - * @brief Create a SOMAPointCloud object at the given URI. + * @brief Create a SOMAPointCloudDataFrame object at the given URI. * - * @param uri URI to create the SOMAPointCloud + * @param uri URI to create the SOMAPointCloudDataFrame * @param schema Arrow schema * @param index_columns The index column names with associated domains * and tile extents per dimension @@ -69,9 +69,9 @@ class SOMAPointCloud : public SOMAArray { std::optional timestamp = std::nullopt); /** - * @brief Open and return a SOMAPointCloud object at the given URI. + * @brief Open and return a SOMAPointCloudDataFrame object at the given URI. * - * @param uri URI to create the SOMAPointCloud + * @param uri URI to create the SOMAPointCloudDataFrame * @param mode read or write * @param ctx SOMAContext * @param column_names A list of column names to use as user-defined index @@ -81,9 +81,9 @@ class SOMAPointCloud : public SOMAArray { * colmajor * @param timestamp If specified, overrides the default timestamp used to * open this object. If unset, uses the timestamp provided by the context. - * @return std::unique_ptr SOMAPointCloud + * @return std::unique_ptr SOMAPointCloudDataFrame */ - static std::unique_ptr open( + static std::unique_ptr open( std::string_view uri, OpenMode mode, std::shared_ptr ctx, @@ -92,9 +92,9 @@ class SOMAPointCloud : public SOMAArray { std::optional timestamp = std::nullopt); /** - * @brief Check if the SOMAPointCloud exists at the URI. + * @brief Check if the SOMAPointCloudDataFrame exists at the URI. * - * @param uri URI to create the SOMAPointCloud + * @param uri URI to create the SOMAPointCloudDataFrame * @param ctx SOMAContext */ static bool exists(std::string_view uri, std::shared_ptr ctx); @@ -104,7 +104,7 @@ class SOMAPointCloud : public SOMAArray { //=================================================================== /** - * @brief Construct a new SOMAPointCloud object. + * @brief Construct a new SOMAPointCloudDataFrame object. * * @param mode read or write * @param uri URI of the array @@ -114,7 +114,7 @@ class SOMAPointCloud : public SOMAArray { * colmajor * @param timestamp Timestamp */ - SOMAPointCloud( + SOMAPointCloudDataFrame( OpenMode mode, std::string_view uri, std::shared_ptr ctx, @@ -132,14 +132,14 @@ class SOMAPointCloud : public SOMAArray { timestamp) { } - SOMAPointCloud(const SOMAArray& other) + SOMAPointCloudDataFrame(const SOMAArray& other) : SOMAArray(other) { } - SOMAPointCloud() = delete; - SOMAPointCloud(const SOMAPointCloud&) = default; - SOMAPointCloud(SOMAPointCloud&&) = delete; - ~SOMAPointCloud() = default; + SOMAPointCloudDataFrame() = delete; + SOMAPointCloudDataFrame(const SOMAPointCloudDataFrame&) = default; + SOMAPointCloudDataFrame(SOMAPointCloudDataFrame&&) = delete; + ~SOMAPointCloudDataFrame() = default; using SOMAArray::open; diff --git a/libtiledbsoma/src/soma/soma_scene.cc b/libtiledbsoma/src/soma/soma_scene.cc index de3c0f7442..a0cd5b77ac 100644 --- a/libtiledbsoma/src/soma/soma_scene.cc +++ b/libtiledbsoma/src/soma/soma_scene.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/libtiledbsoma/src/soma/soma_scene.h b/libtiledbsoma/src/soma/soma_scene.h index 8cf5d7dfd1..6ac36df3c7 100644 --- a/libtiledbsoma/src/soma/soma_scene.h +++ b/libtiledbsoma/src/soma/soma_scene.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/libtiledbsoma/src/tiledbsoma/tiledbsoma b/libtiledbsoma/src/tiledbsoma/tiledbsoma index d15ac4c9eb..707bd03d60 100644 --- a/libtiledbsoma/src/tiledbsoma/tiledbsoma +++ b/libtiledbsoma/src/tiledbsoma/tiledbsoma @@ -54,7 +54,7 @@ #include "soma/soma_experiment.h" #include "soma/soma_measurement.h" #include "soma/soma_scene.h" -#include "soma/soma_point_cloud.h" +#include "soma/soma_point_cloud_dataframe.h" #include "soma/soma_multiscale_image.h" #include "soma/soma_object.h" #include "soma/soma_dataframe.h" diff --git a/libtiledbsoma/src/utils/common.h b/libtiledbsoma/src/utils/common.h index 87b41d5cd0..29174e1b1f 100644 --- a/libtiledbsoma/src/utils/common.h +++ b/libtiledbsoma/src/utils/common.h @@ -41,7 +41,7 @@ namespace tiledbsoma { const std::string SOMA_OBJECT_TYPE_KEY = "soma_object_type"; const std::string ENCODING_VERSION_KEY = "soma_encoding_version"; -const std::string ENCODING_VERSION_VAL = "1"; +const std::string ENCODING_VERSION_VAL = "1.1.0"; using MetadataValue = std::tuple; enum MetadataInfo { dtype = 0, num, value }; diff --git a/libtiledbsoma/test/CMakeLists.txt b/libtiledbsoma/test/CMakeLists.txt index 33bd4178cc..96aab605d1 100644 --- a/libtiledbsoma/test/CMakeLists.txt +++ b/libtiledbsoma/test/CMakeLists.txt @@ -27,6 +27,9 @@ add_executable(unit_soma unit_soma_dense_ndarray.cc unit_soma_sparse_ndarray.cc unit_soma_collection.cc + unit_soma_scene.cc + unit_soma_point_cloud_dataframe.cc + unit_soma_multiscale_image.cc test_indexer.cc # TODO: uncomment when thread_pool is enabled # unit_thread_pool.cc diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 33089f6ea9..eb298e51be 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -478,7 +478,7 @@ TEST_CASE_METHOD( soma_dataframe->close(); soma_dataframe = open(OpenMode::write); - soma_dataframe->maybe_resize_soma_joinid(std::vector({new_max})); + soma_dataframe->resize_soma_joinid(int64_t{new_max}); soma_dataframe->close(); soma_dataframe = open(OpenMode::write); @@ -588,7 +588,7 @@ TEST_CASE_METHOD( REQUIRE(soma_dataframe->nnz() == 4); // Resize - auto new_shape = std::vector({SOMA_JOINID_RESIZE_DIM_MAX + 1}); + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; if (!use_current_domain) { // Domain is already set. The domain (not current domain but domain) @@ -599,7 +599,7 @@ TEST_CASE_METHOD( soma_dataframe = open(OpenMode::write); // Array not resizeable if it has not already been sized - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); } else { @@ -617,11 +617,11 @@ TEST_CASE_METHOD( soma_dataframe->close(); soma_dataframe = open(OpenMode::read); - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); soma_dataframe = open(OpenMode::write); - soma_dataframe->maybe_resize_soma_joinid(new_shape); + soma_dataframe->resize_soma_joinid(new_shape); soma_dataframe->close(); // Check shape after resize @@ -664,6 +664,28 @@ TEST_CASE_METHOD( REQUIRE(maxdom_sjid[1] > 2000000000); } + // Check can_resize_soma_joinid + std::pair check = soma_dataframe + ->can_resize_soma_joinid(1); + if (!use_current_domain) { + REQUIRE(check.first == false); + REQUIRE( + check.second == + "can_resize_soma_joinid: dataframe currently has no domain " + "set: please use tiledbsoma_upgrade_domain."); + } else { + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot resize_soma_joinid: new soma_joinid shape 1 < existing " + "shape 199"); + check = soma_dataframe->can_resize_soma_joinid( + SOMA_JOINID_RESIZE_DIM_MAX + 1); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); + } + soma_dataframe->close(); } } @@ -784,7 +806,7 @@ TEST_CASE_METHOD( soma_dataframe->close(); // Resize - auto new_shape = std::vector({SOMA_JOINID_RESIZE_DIM_MAX + 1}); + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; if (!use_current_domain) { // Domain is already set. The domain (not current domain but domain) @@ -795,7 +817,7 @@ TEST_CASE_METHOD( soma_dataframe = open(OpenMode::write); // Array not resizeable if it has not already been sized - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); } else { @@ -812,11 +834,11 @@ TEST_CASE_METHOD( soma_dataframe->close(); soma_dataframe = open(OpenMode::read); - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); soma_dataframe = open(OpenMode::write); - soma_dataframe->maybe_resize_soma_joinid(new_shape); + soma_dataframe->resize_soma_joinid(new_shape); soma_dataframe->close(); // Check shape after resize @@ -878,6 +900,28 @@ TEST_CASE_METHOD( REQUIRE(maxdom_u32[1] > 2000000000); } + // Check can_resize_soma_joinid + std::pair check = soma_dataframe + ->can_resize_soma_joinid(1); + if (!use_current_domain) { + REQUIRE(check.first == false); + REQUIRE( + check.second == + "can_resize_soma_joinid: dataframe currently has no domain " + "set: please use tiledbsoma_upgrade_domain."); + } else { + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot resize_soma_joinid: new soma_joinid shape 1 < existing " + "shape 199"); + check = soma_dataframe->can_resize_soma_joinid( + SOMA_JOINID_RESIZE_DIM_MAX + 1); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); + } + soma_dataframe->close(); } } @@ -1016,7 +1060,7 @@ TEST_CASE_METHOD( soma_dataframe->close(); // Resize - auto new_shape = std::vector({SOMA_JOINID_RESIZE_DIM_MAX + 1}); + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; if (!use_current_domain) { // Domain is already set. The domain (not current domain but domain) @@ -1027,7 +1071,7 @@ TEST_CASE_METHOD( soma_dataframe = open(OpenMode::write); // Array not resizeable if it has not already been sized - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); } else { @@ -1044,11 +1088,11 @@ TEST_CASE_METHOD( soma_dataframe->close(); soma_dataframe = open(OpenMode::read); - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); soma_dataframe = open(OpenMode::write); - soma_dataframe->maybe_resize_soma_joinid(new_shape); + soma_dataframe->resize_soma_joinid(new_shape); soma_dataframe->close(); // Check shape after resize @@ -1108,6 +1152,28 @@ TEST_CASE_METHOD( REQUIRE(ned_str == std::vector({"", ""})); + // Check can_resize_soma_joinid + std::pair check = soma_dataframe + ->can_resize_soma_joinid(1); + if (!use_current_domain) { + REQUIRE(check.first == false); + REQUIRE( + check.second == + "can_resize_soma_joinid: dataframe currently has no domain " + "set: please use tiledbsoma_upgrade_domain."); + } else { + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot resize_soma_joinid: new soma_joinid shape 1 < existing " + "shape 99"); + check = soma_dataframe->can_resize_soma_joinid( + SOMA_JOINID_RESIZE_DIM_MAX + 1); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); + } + soma_dataframe->close(); } } @@ -1229,7 +1295,7 @@ TEST_CASE_METHOD( soma_dataframe->close(); // Resize - auto new_shape = std::vector({SOMA_JOINID_RESIZE_DIM_MAX + 1}); + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; if (!use_current_domain) { // Domain is already set. The domain (not current domain but domain) @@ -1240,7 +1306,7 @@ TEST_CASE_METHOD( soma_dataframe = open(OpenMode::write); // Array not resizeable if it has not already been sized - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); } else { @@ -1252,11 +1318,11 @@ TEST_CASE_METHOD( soma_dataframe->close(); soma_dataframe = open(OpenMode::read); - REQUIRE_THROWS(soma_dataframe->maybe_resize_soma_joinid(new_shape)); + REQUIRE_THROWS(soma_dataframe->resize_soma_joinid(new_shape)); soma_dataframe->close(); soma_dataframe = open(OpenMode::write); - soma_dataframe->maybe_resize_soma_joinid(new_shape); + soma_dataframe->resize_soma_joinid(new_shape); soma_dataframe->close(); // Check shape after resize -- noting soma_joinid is not a dim here @@ -1299,6 +1365,21 @@ TEST_CASE_METHOD( REQUIRE(maxdom_str == std::vector({"", ""})); } + // Check can_resize_soma_joinid + std::pair check = soma_dataframe + ->can_resize_soma_joinid(0); + if (!use_current_domain) { + REQUIRE(check.first == false); + REQUIRE( + check.second == + "can_resize_soma_joinid: dataframe currently has no domain " + "set: please use tiledbsoma_upgrade_domain."); + } else { + // Must pass since soma_joinid isn't a dim in this case. + REQUIRE(check.first == true); + REQUIRE(check.second == ""); + } + soma_dataframe->close(); } } diff --git a/libtiledbsoma/test/unit_soma_multiscale_image.cc b/libtiledbsoma/test/unit_soma_multiscale_image.cc new file mode 100644 index 0000000000..e32231ee6b --- /dev/null +++ b/libtiledbsoma/test/unit_soma_multiscale_image.cc @@ -0,0 +1,45 @@ +/** + * @file unit_soma_multiscale_image.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file manages unit tests for the SOMAMultiscaleImage class + */ +#include "common.h" + +TEST_CASE("SOMAMultiscaleImage: basic") { + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-multiscale-image-basic"; + + SOMAMultiscaleImage::create(uri, ctx, std::nullopt); + auto soma_image = SOMAMultiscaleImage::open( + uri, OpenMode::read, ctx, std::nullopt); + REQUIRE(soma_image->uri() == uri); + REQUIRE(soma_image->ctx() == ctx); + REQUIRE(soma_image->type() == "SOMAMultiscaleImage"); + soma_image->close(); +} diff --git a/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc b/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc new file mode 100644 index 0000000000..728be687fa --- /dev/null +++ b/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc @@ -0,0 +1,172 @@ +/** + * @file unit_soma_point_cloud_dataframe.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file manages unit tests for the SOMAPointCloudDataFrame class + */ + +#include "common.h" + +const int64_t SOMA_JOINID_DIM_MAX = 99; + +TEST_CASE("SOMAPointCloudDataFrame: basic", "[SOMAPointCloudDataFrame]") { + auto use_current_domain = GENERATE(false, true); + // TODO this could be formatted with fmt::format which is part of internal + // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be + // replaced with std::format. + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + auto ctx = std::make_shared(); + std::string uri{"mem://unit-test-point-cloud-basic"}; + PlatformConfig platform_config{}; + + std::vector dim_infos({ + helper::DimInfo( + {.name = "soma_joinid", + .tiledb_datatype = TILEDB_INT64, + .dim_max = SOMA_JOINID_DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A", + .use_current_domain = use_current_domain}), + helper::DimInfo( + {.name = "x", + .tiledb_datatype = TILEDB_UINT32, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A", + .use_current_domain = use_current_domain}), + helper::DimInfo( + {.name = "y", + .tiledb_datatype = TILEDB_UINT32, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A", + .use_current_domain = use_current_domain}), + }); + + std::vector attr_infos({helper::AttrInfo( + {.name = "radius", .tiledb_datatype = TILEDB_FLOAT64})}); + + // Check the point cloud doesn't exist yet. + REQUIRE(!SOMAPointCloudDataFrame::exists(uri, ctx)); + + // Create the point cloud. + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns( + dim_infos, attr_infos); + SOMAPointCloudDataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config, + std::nullopt); + + // Check the point cloud exists and it cannot be read as a different + // object. + REQUIRE(SOMAPointCloudDataFrame::exists(uri, ctx)); + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + + auto soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + REQUIRE(soma_point_cloud->uri() == uri); + REQUIRE(soma_point_cloud->ctx() == ctx); + REQUIRE(soma_point_cloud->type() == "SOMAPointCloudDataFrame"); + std::vector expected_index_column_names = { + dim_infos[0].name, dim_infos[1].name, dim_infos[2].name}; + REQUIRE( + soma_point_cloud->index_column_names() == + expected_index_column_names); + REQUIRE(soma_point_cloud->nnz() == 0); + soma_point_cloud->close(); + + // Create vectors of data for writing. + std::vector d0(10); + std::iota(d0.begin(), d0.end(), 0); + std::vector d1(10); + std::iota(d1.begin(), d1.end(), 1); + std::vector d2(10, 10); + std::iota(d2.begin(), d2.end(), 0.0); + std::vector a0(10, 1.0); + + // Write to point cloud. + soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::write, + ctx, + {}, // column_names + ResultOrder::automatic, + std::nullopt); + soma_point_cloud->set_column_data( + dim_infos[0].name, d0.size(), d0.data()); + soma_point_cloud->set_column_data( + dim_infos[1].name, d1.size(), d1.data()); + soma_point_cloud->set_column_data( + dim_infos[2].name, d2.size(), d2.data()); + soma_point_cloud->set_column_data( + attr_infos[0].name, a0.size(), a0.data()); + soma_point_cloud->write(); + soma_point_cloud->close(); + + // Read back the data. + soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + while (auto batch = soma_point_cloud->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at(dim_infos[0].name)->data(); + auto d1span = arrbuf->at(dim_infos[1].name)->data(); + auto d2span = arrbuf->at(dim_infos[2].name)->data(); + auto a0span = arrbuf->at(attr_infos[0].name)->data(); + CHECK(d0 == std::vector(d0span.begin(), d0span.end())); + CHECK(d1 == std::vector(d1span.begin(), d1span.end())); + CHECK(d2 == std::vector(d2span.begin(), d2span.end())); + CHECK(a0 == std::vector(a0span.begin(), a0span.end())); + } + soma_point_cloud->close(); + + auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); + REQUIRE(soma_object->uri() == uri); + REQUIRE(soma_object->type() == "SOMAPointCloudDataFrame"); + soma_object->close(); + } +} diff --git a/libtiledbsoma/test/unit_soma_scene.cc b/libtiledbsoma/test/unit_soma_scene.cc new file mode 100644 index 0000000000..c047767983 --- /dev/null +++ b/libtiledbsoma/test/unit_soma_scene.cc @@ -0,0 +1,45 @@ +/** + * @file unit_soma_scene.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file manages unit tests for the SOMAScene class + */ + +#include "common.h" + +TEST_CASE("SOMAScene: basic") { + auto ctx = std::make_shared(); + std::string uri{"mem://unit-test-scene-basic"}; + + SOMAScene::create(uri, ctx, std::nullopt); + auto soma_scene = SOMAScene::open(uri, OpenMode::read, ctx, std::nullopt); + CHECK(soma_scene->uri() == uri); + CHECK(soma_scene->ctx() == ctx); + CHECK(soma_scene->type() == "SOMAScene"); + soma_scene->close(); +} diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index bb6b2decc4..9315a8724b 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -340,3 +340,141 @@ TEST_CASE("SOMASparseNDArray: metadata", "[SOMASparseNDArray]") { REQUIRE(soma_sparse->metadata_num() == 2); } } +void breakme() { +} + +TEST_CASE( + "SOMASparseNDArray: can_tiledbsoma_upgrade_shape", "[SOMASparseNDArray]") { + int64_t dim_max = 999; + + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-sparse-ndarray-upgrade-shape"; + + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A", + .use_current_domain = false}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + attr_arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx); + + auto soma_sparse = SOMASparseNDArray::open(uri, OpenMode::write, ctx); + REQUIRE(soma_sparse->has_current_domain() == false); + + // For old-style arrays, from before the current-domain feature: + // * The shape specified at create becomes the core (max) domain + // o Recall that the core domain is immutable + // * There is no current domain set + // o A current domain can be applied to it, up to <= (max) domain + auto dom = soma_sparse->soma_domain_slot(dim_name); + auto mxd = soma_sparse->soma_maxdomain_slot(dim_name); + REQUIRE(dom == mxd); + REQUIRE(dom.first == 0); + REQUIRE(dom.second == dim_max); + + breakme(); + std::vector newshape_wrong_dims({dim_max, 12}); + std::vector newshape_too_big({dim_max + 10}); + std::vector newshape_good({40}); + + auto check = soma_sparse->can_upgrade_shape(newshape_wrong_dims); + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot tiledbsoma_upgrade_shape: provided shape has ndim 2, while the " + "array has 1"); + + check = soma_sparse->can_upgrade_shape(newshape_too_big); + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot tiledbsoma_upgrade_shape for soma_dim_0: new 1009 < maxshape " + "1000"); + + check = soma_sparse->can_upgrade_shape(newshape_good); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); +} + +TEST_CASE("SOMASparseNDArray: can_resize", "[SOMASparseNDArray]") { + int64_t dim_max = 999; + + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-sparse-ndarray-resize"; + + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A", + .use_current_domain = true}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + attr_arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx); + + auto soma_sparse = SOMASparseNDArray::open(uri, OpenMode::write, ctx); + REQUIRE(soma_sparse->has_current_domain() == true); + + // For new-style arrays, with the current-domain feature: + // * The shape specified at create becomes the core current domain + // o Recall that the core current domain is mutable, up tp <= (max) domain + // * The core (max) domain is huge + // o Recall that the core max domain is immutable + auto dom = soma_sparse->soma_domain_slot(dim_name); + auto mxd = soma_sparse->soma_maxdomain_slot(dim_name); + REQUIRE(dom != mxd); + REQUIRE(dom.first == 0); + REQUIRE(dom.second == dim_max); + + std::vector newshape_wrong_dims({dim_max, 12}); + std::vector newshape_too_small({40}); + std::vector newshape_good({2000}); + + auto check = soma_sparse->can_resize(newshape_wrong_dims); + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot resize: provided shape has ndim 2, while the array has 1"); + + check = soma_sparse->can_resize(newshape_too_small); + REQUIRE(check.first == false); + REQUIRE( + check.second == + "cannot resize for soma_dim_0: new 40 < existing shape 1000"); + + check = soma_sparse->can_resize(newshape_good); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); +}