Skip to content

Commit

Permalink
name-neatening, and libtiledbsoma logging aids
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 2, 2024
1 parent 05aff2e commit 524b761
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 11 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ def _update_dataframe(
else:
add_attrs[add_key] = get_arrow_str_format(atype)

clib._update_dataframe(
clib._update_dataframe_schema(
sdf.uri, sdf.context.native_context, list(drop_keys), add_attrs, add_enmrs
)

Expand Down
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ PYBIND11_MODULE(pytiledbsoma, m) {
"consolidate_and_vacuum", &PlatformConfig::consolidate_and_vacuum);

m.def(
"_update_dataframe",
"_update_dataframe_schema",
[](std::string uri,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> drop_attrs,
std::map<std::string, std::string> add_attrs,
std::map<std::string, std::pair<std::string, bool>> add_enmrs) {
SOMADataFrame::update_dataframe(
SOMADataFrame::update_dataframe_schema(
uri, ctx, drop_attrs, add_attrs, add_enmrs);
});

Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,7 +1744,7 @@ def test_fix_update_dataframe_with_var_strings(tmp_path):
updated_sdf["newattr"] = np.array(["a", "b", "c", "d"])

with soma.DataFrame.open(uri, "w") as sdf:
soma.io.ingest._update_dataframe(
soma.io.ingest._update_dataframe_schema(
sdf,
updated_sdf,
"testing",
Expand Down
19 changes: 16 additions & 3 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,17 @@ SOMADataFrame <- R6::R6Class(
# To push down to C++:
#
# void SOMADataFrame::update_dataframe(
# std::string uri,
# std::shared_ptr<SOMAContext> ctx,
# std::vector<std::string> drop_attrs,
# k std::string uri,
# k std::shared_ptr<SOMAContext> ctx,
# k std::vector<std::string> drop_attrs,
# std::map<std::string, std::string> add_attrs,
# std::map<std::string, std::pair<std::string, bool>> add_enmrs);

cat("\n\n")
cat("DROP COLS\n")
print(drop_cols)
cat("\n\n")

# Drop columns
se <- tiledb::tiledb_array_schema_evolution()
for (drop_col in drop_cols) {
Expand All @@ -328,6 +333,14 @@ SOMADataFrame <- R6::R6Class(
tiledb_create_options = tiledb_create_options
)

cat("\n\n")
cat("ADD COL NAME\n")
cat(add_col)
cat("ADD COL TYPE\n")
print(col_type)
cat("\n\n")
browser()

if (inherits(col_type, "DictionaryType")) {
spdl::debug(
"[SOMADataFrame update]: adding column '{}' as an enumerated type",
Expand Down
39 changes: 37 additions & 2 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
*/

#include "soma_dataframe.h"
#include "../utils/logger.h"

namespace tiledbsoma {
using namespace tiledb;
Expand Down Expand Up @@ -86,14 +87,17 @@ bool SOMADataFrame::exists(
}
}

void SOMADataFrame::update_dataframe(
void SOMADataFrame::update_dataframe_schema(
std::string uri,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> drop_attrs,
std::map<std::string, std::string> add_attrs,
std::map<std::string, std::pair<std::string, bool>> add_enmrs) {
ArraySchemaEvolution se(*ctx->tiledb_ctx());
for (auto key_name : drop_attrs) {
LOG_DEBUG(fmt::format(
"[SOMADataFrame::update_dataframe_schema] drop col name {}",
key_name));
se.drop_attribute(key_name);
}
for (auto add_attr : add_attrs) {
Expand Down Expand Up @@ -136,18 +140,49 @@ void SOMADataFrame::update_dataframe(
// we must accommodate reasonable/predictable needs.
attr.set_nullable(true);

// Non-enum columns:
//
// * add_attrs: attr_name -> Arrow type string like "i" or "U"
// * add_enmrs: no key present
//
// Enum columns:
//
// * add_attrs: attr_name -> attr_type: Arrow type string for the index
// type, e.g. 'c' for int8
// * add_enmrs: attr_name -> pair of:
// o enmr_type: Arrow type string the value type, e.g. "f" or "U"
// o ordered: bool

auto enmr_it = add_enmrs.find(attr_name);
bool has_enmr = enmr_it != add_enmrs.end();

if (has_enmr) {
auto [enmr_type, ordered] = enmr_it->second;
LOG_DEBUG(fmt::format(
"[SOMADataFrame::update_dataframe_schema] add col name {} "
"index_type "
"{} value_type {} ordered {}",
attr_name,
attr_type,
enmr_type,
ordered));
se.add_enumeration(Enumeration::create_empty(
*ctx->tiledb_ctx(),
attr_name,
ArrowAdapter::to_tiledb_format(enmr_type),
enmr_type == "U" || enmr_type == "Z" ? TILEDB_VAR_NUM : 1,
enmr_type == "u" || enmr_type == "z" || enmr_type == "U" ||
enmr_type == "Z" ?
TILEDB_VAR_NUM :
1,
ordered));
AttributeExperimental::set_enumeration_name(
*ctx->tiledb_ctx(), attr, attr_name);
} else {
LOG_DEBUG(fmt::format(
"[SOMADataFrame::update_dataframe_schema] add col name {} type "
"{}",
attr_name,
attr_type));
}

se.add_attribute(attr);
Expand Down
19 changes: 17 additions & 2 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,24 @@ class SOMADataFrame : public SOMAArray {
static bool exists(std::string_view uri, std::shared_ptr<SOMAContext> ctx);

/**
* This is for schema evolution
* This is for schema evolution.
*
* For non-enum attrs:
*
* o drop_cols: attr_name
* o add_attrs: attr_name -> Arrow type string like "i" or "U"
* o add_enmrs: no key present
*
* Enum attrs:
*
* o drop_cols: attr_name
* o add_attrs: attr_name -> Arrow type string for the index
* type, e.g. 'c' for int8
* o add_enmrs: attr_name -> pair of:
* - Arrow type string the value type, e.g. "f" or "U"
* - bool ordered
*/
static void update_dataframe(
static void update_dataframe_schema(
std::string uri,
std::shared_ptr<SOMAContext> ctx,
std::vector<std::string> drop_attrs,
Expand Down

0 comments on commit 524b761

Please sign in to comment.