Skip to content

Commit

Permalink
merge (#2505)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl authored May 3, 2024
1 parent e4fd7f1 commit 82f1468
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 34 deletions.
28 changes: 20 additions & 8 deletions apis/r/R/SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,18 @@ SOMADenseNDArray <- R6::R6Class(

result_order <- map_query_layout(match_query_layout(result_order))

if (!is.null(coords)) {
coords <- private$.convert_coords(coords)
if (is.null(coords)) {
# These are 0-up: add 1 for R use
ned <- self$non_empty_domain()
coords <- lapply(X=as.integer(ned), FUN=function(x){0:x})
}
coords <- private$.convert_coords(coords)

cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context()))
rl <- soma_array_reader(uri = uri,
dim_points = coords, # NULL dealt with by soma_array_reader()
dim_points = coords,
result_order = result_order,
loglevel = log_level, # idem
loglevel = log_level, # NULL dealt with by soma_array_reader()
config = cfg)

soma_array_to_arrow_table(rl)
Expand All @@ -74,15 +77,24 @@ SOMADenseNDArray <- R6::R6Class(

dims <- self$dimensions()
attr <- self$attributes()

stopifnot("Array must have two dimensions" = length(dims) == 2,
"Array must contain columns 'soma_dim_0' and 'soma_dim_1'" =
all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)),
"Array must contain column 'soma_data'" = all.equal("soma_data", names(attr)))

if (is.null(coords)) {
ned <- self$non_empty_domain()
# These are 0-up: add 1 for R use
nrow <- as.numeric(ned[[1]]) + 1
ncol <- as.numeric(ned[[2]]) + 1
} else {
nrow <- length(unique(as.numeric(coords[[1]])))
ncol <- length(unique(as.numeric(coords[[2]])))
}

tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level)
m <- matrix(as.numeric(tbl$GetColumnByName("soma_data")),
nrow = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))),
ncol = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))),
nrow = nrow,
ncol = ncol,
byrow = result_order == "ROW_MAJOR")

},
Expand Down
34 changes: 32 additions & 2 deletions apis/r/R/SOMAExperimentAxisQuery.R
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,42 @@ SOMAExperimentAxisQuery <- R6::R6Class(

# Retrieve coo arrow table with query result
layer <- self$ms[[collection]]$get(layer_name)

if (inherits(layer, "SOMADenseNDArray")) {
tbl <- layer$read_arrow_table(coords = coords)
} else {
tbl <- layer$read(coords = coords)$tables()$concat()
# For dense arrays, coordinates are not materialized in storage.
# Here we re-supply them.
ldims <- layer$dimnames()
for (i in seq_along(ldims)) {
ldim <- ldims[i]
if (is.null(coords[[ldim]])) {
coords[[ldim]] <- seq_len(as.numeric(layer$non_empty_domain()[i] + 1L))
}
}
mat <- matrix(
tbl$soma_data$as_vector(),
nrow = length(coords$soma_dim_0),
ncol = length(coords$soma_dim_1),
byrow = TRUE
)
# Add dimnames
dim_names <- switch(
EXPR = collection,
X = list(obs_labels, var_labels),
obsm = list(obs_labels, coords$soma_dim_1), # or list(obs_labels, seq_len(ncol(mat)))
varm = list(var_labels, coords$soma_dim_1), # or list(var_lavels, seq_len(ncol(mat)))
obsp = list(obs_labels, obs_labels),
varp = list(var_labels, var_labels)
)
dim_names <- Map("%||%", dim_names, coords)
dimnames(mat) <- dim_names
return(mat)
}

# For sparse arrays, coordinates are materialized in storage.
# They come back with the `tbl` as COO a.k.a. IJV triples.

tbl <- layer$read(coords = coords)$tables()$concat()

# Reindex the coordinates
# Constructing a matrix with the joinids produces a matrix with
Expand Down
2 changes: 1 addition & 1 deletion apis/r/tests/testthat/test-SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ test_that("SOMADenseNDArray creation", {
ndarray <- SOMADenseNDArrayOpen(uri)
tbl <- ndarray$read_arrow_table(result_order = "COL_MAJOR")
expect_true(is_arrow_table(tbl))
expect_equal(tbl$ColumnNames(), c("soma_dim_0", "soma_dim_1", "soma_data"))
expect_equal(tbl$ColumnNames(), c("soma_data"))

expect_identical(
as.numeric(tbl$GetColumnByName("soma_data")),
Expand Down
23 changes: 16 additions & 7 deletions apis/r/tools/r-ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -372,15 +372,24 @@ DumpLogsByExtension() {
fi
extension=$1
shift
package=$(find . -maxdepth 1 -name "*.Rcheck" -type d)
if [[ ${#package[@]} -ne 1 ]]; then
echo "Could not find package Rcheck directory, skipping log dump."

# Note: this script may be run from the apis/r subdirectory or from the
# package base directory. This runs in CI from a clean image so in the
# (hypothetical) case of multiple log files, we err on the side of providing
# the operator with more inforation rather than less.
found="false"
for package in $(find . -name "*.Rcheck" -type d); do
for name in $(find "${package}" -type f -name "*${extension}"); do
found="true"
echo "::group::Package ${package} filename ${name}" # GHA magic
cat ${name}
echo "::endgroup::"
done
done
if [[ "$found" = "false" ]]; then
echo "Could not find any package Rcheck directories; skipping log dump."
exit 0
fi
for name in $(find "${package}" -type f -name "*${extension}"); do
echo ">>> Filename: ${name} <<<"
cat ${name}
done
}

DumpLogs() {
Expand Down
24 changes: 13 additions & 11 deletions apis/system/tests/test_densendarray_write_python_read_r.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class TestDenseNDArrayWritePythonReadR(TestWritePythonReadR):
"""

@pytest.fixture(scope="class")
def dense_nd_array(self):
def np_dense_nd_array(self):
arr = soma.DenseNDArray.create(self.uri, type=pa.int32(), shape=(2, 3, 4))
ndarray = np.random.default_rng().integers(0, 10, 24).reshape(2, 3, 4)
data = pa.Tensor.from_numpy(ndarray)
Expand All @@ -24,30 +24,32 @@ def base_R_script(self):
return f"""
library("tiledbsoma")
soma_ndarray <- SOMADenseNDArrayOpen("{self.uri}")
table = soma_ndarray$read_arrow_table()
df = as.data.frame(table)
table <- soma_ndarray$read_arrow_table()
df <- as.data.frame(table)
"""

def test_ndarray_shape_matches(self, dense_nd_array):
def test_ndarray_shape_matches(self, np_dense_nd_array):
"""
The source ndarray is a 2x3x4 tensor, so the resulting soma_ndarray should have 3 dimensions.
"""
self.r_assert("stopifnot(length(soma_ndarray$dimensions()) == 3)")

def test_ndarray_type_matches(self, dense_nd_array):
def test_ndarray_type_matches(self, np_dense_nd_array):
"""
The DenseNDArray should have a type of int32.
"""
self.r_assert('stopifnot(table$soma_data$type$ToString() == "int32")')

def test_ndarray_content_matches(self, dense_nd_array):
def test_ndarray_content_matches(self, np_dense_nd_array):
"""
The DenseNDArray content should match. To test this, we convert the DenseNDArray into an arrow.Table and
to a data.frame, and we use the coordinates to match the values.
The DenseNDArray content should match. Sparse reads materialize coordinates, e.g.
soma_dim_0, soma_dim_1, soma_dim_2, and soma_data. Dense reads do not: we get soma_data.
A quick way to check for matches is to flatten out both sides and compare indexwise.
"""
flattened_python_read = np_dense_nd_array.flatten().tolist()
multi_assert = ""
for x, y, z in np.ndindex((2, 3, 4)):
val = dense_nd_array[x, y, z]
multi_assert += f"stopifnot(df[which(df$soma_dim_0 == {x} & df$soma_dim_1 == {y} & df$soma_dim_2 == {z}),]$soma_data == {val})"
for i in range(2 * 3 * 4):
val = flattened_python_read[i]
multi_assert += f"stopifnot(df$soma_data[[{i}+1]] == {val})"
multi_assert += "\n"
self.r_assert(multi_assert)
6 changes: 4 additions & 2 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,10 @@ void ManagedQuery::setup_read() {
// If no columns were selected, select all columns.
// Add dims and attrs in the same order as specified in the schema
if (columns_.empty()) {
for (const auto& dim : array_->schema().domain().dimensions()) {
columns_.push_back(dim.name());
if (array_->schema().array_type() == TILEDB_SPARSE) {
for (const auto& dim : array_->schema().domain().dimensions()) {
columns_.push_back(dim.name());
}
}
int attribute_num = array_->schema().attribute_num();
for (int i = 0; i < attribute_num; i++) {
Expand Down
6 changes: 3 additions & 3 deletions libtiledbsoma/test/unit_soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ TEST_CASE("SOMADenseNDArray: basic") {
REQUIRE(soma_dense->type() == "SOMADenseNDArray");
while (auto batch = soma_dense->read_next()) {
auto arrbuf = batch.value();
auto d0span = arrbuf->at("d0")->data<int64_t>();
// Dense coordinates are not materialized on read so we can't check
// a d0span of type int64_t
auto a0span = arrbuf->at("a0")->data<int>();
REQUIRE(d0 == std::vector<int64_t>(d0span.begin(), d0span.end()));
REQUIRE(a0 == std::vector<int>(a0span.begin(), a0span.end()));
}
soma_dense->close();
Expand Down Expand Up @@ -168,4 +168,4 @@ TEST_CASE("SOMADenseNDArray: metadata") {
soma_dense->open(OpenMode::read, TimestampRange(0, 2));
REQUIRE(!soma_dense->has_metadata("md"));
REQUIRE(soma_dense->metadata_num() == 2);
}
}

0 comments on commit 82f1468

Please sign in to comment.