Skip to content

Commit

Permalink
Geometry dataframe implementation, use spatial axes when available wh…
Browse files Browse the repository at this point in the history
…en creating schema
  • Loading branch information
XanthosXanthopoulos committed Oct 21, 2024
1 parent 928c281 commit 02bfa69
Show file tree
Hide file tree
Showing 10 changed files with 753 additions and 66 deletions.
3 changes: 3 additions & 0 deletions libtiledbsoma/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ add_library(TILEDB_SOMA_OBJECTS OBJECT
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_experiment.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_measurement.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_scene.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_geometry_dataframe.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud_dataframe.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_multiscale_image.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_context.cc
Expand Down Expand Up @@ -188,6 +189,7 @@ endif()
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_experiment.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_measurement.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_scene.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_geometry_dataframe.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_point_cloud_dataframe.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_multiscale_image.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_object.h
Expand All @@ -211,6 +213,7 @@ install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_experiment.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_measurement.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_scene.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_geometry_dataframe.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_point_cloud_dataframe.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_multiscale_image.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_object.h
Expand Down
125 changes: 125 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* @file soma_geometry_dataframe.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines the SOMAGeometryDataFrame class.
*/

#include "soma_geometry_dataframe.h"
#include "../utils/util.h"

#include <regex>

namespace tiledbsoma {
using namespace tiledb;

//===================================================================
//= public static
//===================================================================

void SOMAGeometryDataFrame::create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ArrowTable index_columns,
ArrowTable spatial_columns,
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config,
std::optional<TimestampRange> timestamp) {
std::vector<std::string> spatial_axes;
auto tiledb_schema = ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(),
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
"SOMAGeometryDataFrame",
true,
platform_config,
ArrowTable(
std::move(spatial_columns.first),
std::move(spatial_columns.second)));
auto array = SOMAArray::create(
ctx, uri, tiledb_schema, "SOMAGeometryDataFrame", timestamp);
}

std::unique_ptr<SOMAGeometryDataFrame> SOMAGeometryDataFrame::open(
std::string_view uri,
OpenMode mode,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> column_names,
ResultOrder result_order,
std::optional<TimestampRange> timestamp) {
return std::make_unique<SOMAGeometryDataFrame>(
mode, uri, ctx, column_names, result_order, timestamp);
}

bool SOMAGeometryDataFrame::exists(
std::string_view uri, std::shared_ptr<SOMAContext> ctx) {
try {
auto obj = SOMAObject::open(uri, OpenMode::read, ctx);
return "SOMAGeometryDataFrame" == obj->type();
} catch (TileDBSOMAError& e) {
return false;
}
}

//===================================================================
//= public non-static
//===================================================================

std::unique_ptr<ArrowSchema> SOMAGeometryDataFrame::schema() const {
return this->arrow_schema();
}

const std::vector<std::string> SOMAGeometryDataFrame::index_column_names()
const {
return this->dimension_names();
}

const std::vector<std::string> SOMAGeometryDataFrame::spatial_column_names()
const {
std::vector<std::string> names;
std::unordered_set<std::string> unique_names;
std::regex rgx("tiledb__internal__(\\S+)__");
std::smatch matches;
for (auto dimension : this->dimension_names()) {
if (std::regex_search(dimension, matches, rgx)) {
if (unique_names.count(matches[1].str()) == 0) {
unique_names.insert(matches[1].str());
names.push_back(matches[1].str());
}
}
}

return names;
}

uint64_t SOMAGeometryDataFrame::count() {
return this->nnz();
}

} // namespace tiledbsoma
179 changes: 179 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/**
* @file soma_geometry_dataframe.h
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines the SOMAGeometryDataFrame class.
*/

#ifndef SOMA_GEOMETRY_DATAFRAME
#define SOMA_GEOMETRY_DATAFRAME

#include <filesystem>

#include "soma_array.h"

namespace tiledbsoma {

class ArrayBuffers;

using namespace tiledb;

class SOMAGeometryDataFrame : virtual public SOMAArray {
public:
//===================================================================
//= public static
//===================================================================

/**
* @brief Create a SOMAGeometryDataFrame object at the given URI.
*
* @param uri URI to create the SOMAGeometryDataFrame
* @param schema Arrow schema
* @param index_columns The index column names with associated domains
* and tile extents per dimension
* @param spatial_columns The spatial column names with associated domains
* and tile extents per dimension
* @param ctx SOMAContext
* @param platform_config Optional config parameter dictionary
* @param timestamp Optional the timestamp range to write SOMA metadata info
*/
static void create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ArrowTable index_columns,
ArrowTable spatial_columns,
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config = PlatformConfig(),
std::optional<TimestampRange> timestamp = std::nullopt);

/**
* @brief Open and return a SOMAGeometryDataFrame object at the given URI.
*
* @param uri URI to create the SOMAGeometryDataFrame
* @param mode read or write
* @param ctx SOMAContext
* @param column_names A list of column names to use as user-defined index
* columns (e.g., ``['cell_type', 'tissue_type']``). All named columns must
* exist in the schema, and at least one index column name is required.
* @param result_order Read result order: automatic (default), rowmajor, or
* colmajor
* @param timestamp If specified, overrides the default timestamp used to
* open this object. If unset, uses the timestamp provided by the context.
* @return std::unique_ptr<SOMAGeometryDataFrame> SOMAGeometryDataFrame
*/
static std::unique_ptr<SOMAGeometryDataFrame> open(
std::string_view uri,
OpenMode mode,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> column_names = {},
ResultOrder result_order = ResultOrder::automatic,
std::optional<TimestampRange> timestamp = std::nullopt);

/**
* @brief Check if the SOMAGeometryDataFrame exists at the URI.
*
* @param uri URI to create the SOMAGeometryDataFrame
* @param ctx SOMAContext
*/
static bool exists(std::string_view uri, std::shared_ptr<SOMAContext> ctx);

//===================================================================
//= public non-static
//===================================================================

/**
* @brief Construct a new SOMAGeometryDataFrame object.
*
* @param mode read or write
* @param uri URI of the array
* @param ctx TileDB context
* @param column_names Columns to read
* @param result_order Read result order: automatic (default), rowmajor, or
* colmajor
* @param timestamp Timestamp
*/
SOMAGeometryDataFrame(
OpenMode mode,
std::string_view uri,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> column_names,
ResultOrder result_order,
std::optional<TimestampRange> timestamp = std::nullopt)
: SOMAArray(
mode,
uri,
ctx,
std::filesystem::path(uri).filename().string(), // array name
column_names,
"auto", // batch_size
result_order,
timestamp) {
}

SOMAGeometryDataFrame(const SOMAArray& other)
: SOMAArray(other) {
}

SOMAGeometryDataFrame() = delete;
SOMAGeometryDataFrame(const SOMAGeometryDataFrame&) = default;
SOMAGeometryDataFrame(SOMAGeometryDataFrame&&) = delete;
~SOMAGeometryDataFrame() = default;

using SOMAArray::open;

/**
* Return the data schema, in the form of a ArrowSchema.
*
* @return std::unique_ptr<ArrowSchema>
*/
std::unique_ptr<ArrowSchema> schema() const;

/**
* Return the index (dimension) column names.
*
* @return std::vector<std::string>
*/
const std::vector<std::string> index_column_names() const;

/**
* Return the spatial column names.
*
* @return std::vector<std::string>
*/
const std::vector<std::string> spatial_column_names() const;

/**
* Return the number of rows.
*
* @return int64_t
*/
uint64_t count();
};
} // namespace tiledbsoma

#endif // SOMA_GEOMETRY_DATAFRAME
4 changes: 2 additions & 2 deletions libtiledbsoma/src/soma/soma_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "soma_dataframe.h"
#include "soma_dense_ndarray.h"
#include "soma_experiment.h"
#include "soma_geometry_dataframe.h"
#include "soma_measurement.h"
#include "soma_multiscale_image.h"
#include "soma_point_cloud_dataframe.h"
Expand Down Expand Up @@ -61,8 +62,7 @@ std::unique_ptr<SOMAObject> SOMAObject::open(
} else if (array_type == "somapointclouddataframe") {
return std::make_unique<SOMAPointCloudDataFrame>(*array_);
} else if (array_type == "somageometrydataframe") {
throw TileDBSOMAError(
"Support for SOMAGeometryDataFrame is not yet implemented");
return std::make_unique<SOMAGeometryDataFrame>(*array_);
} else {
throw TileDBSOMAError("Saw invalid SOMAArray type");
}
Expand Down
1 change: 1 addition & 0 deletions libtiledbsoma/src/tiledbsoma/tiledbsoma
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "soma/soma_experiment.h"
#include "soma/soma_measurement.h"
#include "soma/soma_scene.h"
#include "soma/soma_geometry_dataframe.h"
#include "soma/soma_point_cloud_dataframe.h"
#include "soma/soma_multiscale_image.h"
#include "soma/soma_object.h"
Expand Down
Loading

0 comments on commit 02bfa69

Please sign in to comment.