From 6a0130cb3e63124f6c4f60ecb3e7b565f8f75f12 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 22 Oct 2024 23:19:54 -0400 Subject: [PATCH] [c++] `upgrade_domain` --- libtiledbsoma/src/soma/soma_array.cc | 129 +++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 9a954d3d72..19d542c592 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -2065,6 +2065,135 @@ std::pair SOMAArray::_can_set_dataframe_domainish_subhelper( return std::pair(true, ""); } +void SOMAArray::upgrade_domain( + const ArrowTable& newdomain, std::string function_name_for_messages) { + if (has_current_domain()) { + throw TileDBSOMAError(fmt::format(fmt::format( + "{}: dataframe already has a domain", function_name_for_messages))); + } + + Domain domain = arr_->schema().domain(); + + // XXX + ArrowArray* new_domain_array = newdomain.first.get(); + ArrowSchema* new_domain_schema = newdomain.second.get(); + + if (new_domain_schema->n_children != domain.ndim()) { + throw TileDBSOMAError(fmt::format(fmt::format( + "{}: requested domain has ndim={} but the dataframe has " + "ndim={}", + function_name_for_messages, + new_domain_schema->n_children, + domain.ndim()))); + } + + if (new_domain_schema->n_children != new_domain_array->n_children) { + throw TileDBSOMAError(fmt::format(fmt::format( + "{}: internal coding error", function_name_for_messages))); + } + + auto tctx = ctx_->tiledb_ctx(); + NDRectangle ndrect(*tctx, domain); + CurrentDomain new_current_domain(*tctx); + + for (unsigned i = 0; i < domain.ndim(); i++) { + const Dimension& dim = domain.dimension(i); + const std::string dim_name = dim.name(); + + switch (dim.type()) { + case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: + case TILEDB_CHAR: + case TILEDB_GEOM_WKB: + case TILEDB_GEOM_WKT: + ndrect.set_range(dim_name, "", "\xff"); + break; + + case TILEDB_INT8: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + int8_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_BOOL: + case TILEDB_UINT8: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + uint8_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_INT16: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + int16_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_UINT16: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + uint16_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_INT32: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + int32_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_UINT32: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + uint32_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_INT64: + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + int64_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_UINT64: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + uint64_t>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_FLOAT32: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + float>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + case TILEDB_FLOAT64: { + auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< + double>(newdomain, i); + ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); + } break; + default: + throw TileDBSOMAError(fmt::format( + "{}: internal error: unhandled type {} for {}.", + function_name_for_messages, + tiledb::impl::type_to_str(dim.type()), + dim_name)); + } + } + + new_current_domain.set_ndrectangle(ndrect); +} + std::vector SOMAArray::_tiledb_current_domain() { // Variant-indexed dataframes must use a separate path _check_dims_are_int64();