Skip to content

Commit

Permalink
[r] Backport #3145 to release-1.14; 1.14.3 (#3147)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl authored Oct 8, 2024
1 parent 0e63ade commit 85339bd
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 26 deletions.
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.14.2
Version: 1.14.3
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "aaron@tiledb.com",
Expand Down
6 changes: 6 additions & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# tiledbsoma 1.14.3

## Changes

* Handle `numeric` coords properly when reading arrays [3145](https://github.com/single-cell-data/TileDB-SOMA/pull/3145)

# tiledbsoma 1.14.2

## Changes
Expand Down
61 changes: 45 additions & 16 deletions apis/r/R/SOMANDArrayBase.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,51 @@ SOMANDArrayBase <- R6::R6Class(
# format acceptable for sr_setup and soma_array_reader
.convert_coords = function(coords) {

## ensure coords is a named list, use to select dim points
stopifnot("'coords' must be a list" = is.list(coords),
"'coords' must be a list of vectors or integer64" =
all(vapply_lgl(coords, is_vector_or_int64)),
"'coords' if unnamed must have length of dim names, else if named names must match dim names" =
(is.null(names(coords)) && length(coords) == length(self$dimnames())) ||
(!is.null(names(coords)) && all(names(coords) %in% self$dimnames()))
)

## if unnamed (and test for length has passed in previous statement) set names
if (is.null(names(coords))) names(coords) <- self$dimnames()

## convert integer to integer64 to match dimension type
coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x)

coords
# Ensure coords is a named list, use to select dim points
stopifnot(
"'coords' must be a list" = is.list(coords) && length(coords),
"'coords' must be a list integerish vectors" =
all(vapply(
X = coords,
FUN = function(x) {
if (is.null(x)) {
return(TRUE)
}
return(
(is.null(dim(x)) && !is.factor(x)) &&
(rlang::is_integerish(x, finite = TRUE) || (bit64::is.integer64(x) && all(is.finite(x)))) &&
length(x) &&
all(x >= 0L)
)
},
FUN.VALUE = logical(length = 1L),
USE.NAMES = FALSE
)),
"'coords' if unnamed must have length of dim names, else if named names must match dim names" = ifelse(
test = is.null(names(coords)),
yes = length(coords) == length(self$dimnames()),
no = all(names(coords) %in% self$dimnames())
)
)

# Remove NULL-entries from coords
coords <- Filter(Negate(is.null), coords)
if (!length(coords)) {
return(NULL)
}

# If unnamed, set names
if (is.null(names(coords))) {
names(coords) <- self$dimnames()
}

# Convert to integer64 to match dimension type
return(sapply(
coords,
FUN = bit64::as.integer64,
simplify = FALSE,
USE.NAMES = TRUE
))
},

# @description Converts a vector of ints into a vector of int64 in a format
Expand Down
17 changes: 10 additions & 7 deletions apis/r/R/SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ SOMASparseNDArray <- R6::R6Class(
coords <- private$.convert_coords(coords)
}

sr <- sr_setup(uri = self$uri,
private$.soma_context,
dim_points = coords,
result_order = result_order,
timestamprange = self$.tiledb_timestamp_range,
loglevel = log_level)
SOMASparseNDArrayRead$new(sr, self, coords)
sr <- sr_setup(
uri = self$uri,
private$.soma_context,
dim_points = coords,
result_order = result_order,
timestamprange = self$.tiledb_timestamp_range,
loglevel = log_level
)

return(SOMASparseNDArrayRead$new(sr, self, coords))
},

#' @description Write matrix-like data to the array. (lifecycle: maturing)
Expand Down
4 changes: 2 additions & 2 deletions apis/r/tests/testthat/test-SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ test_that("SOMADenseNDArray creation", {

# Subset the array on both dimensions
tbl <- ndarray$read_arrow_table(
coords = list(soma_dim_0=0:3, soma_dim_1=0:2),
coords = list(soma_dim_0 = 0:3, soma_dim_1 = 0:2),
result_order = "COL_MAJOR"
)
expect_identical(
Expand Down Expand Up @@ -69,7 +69,7 @@ test_that("SOMADenseNDArray creation", {
# Validating coords format
expect_error(
ndarray$read_arrow_table(coords = list(cbind(0, 1))),
"must be a list of vectors"
regexp = "'coords' must be a list integerish vectors"
)

# Validate TileDB array schema
Expand Down
110 changes: 110 additions & 0 deletions apis/r/tests/testthat/test-SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,116 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", {
ndarray$close()
})

test_that("SOMASparseNDArray read coordinates", {
skip_if(!extended_tests())
uri <- tempfile(pattern = "sparse-ndarray")
nrows <- 100L
ncols <- 20L

ndarray <- create_and_populate_sparse_nd_array(
uri = uri,
mode = "READ",
nrows = nrows,
ncols = ncols,
seed = 42L
)
on.exit(ndarray$close(), add = TRUE, after = FALSE)

expect_identical(as.integer(ndarray$shape()), c(nrows, ncols))
expect_s4_class(mat <- ndarray$read()$sparse_matrix()$concat(), "dgTMatrix")
expect_identical(dim(mat), c(nrows, ncols))

# Note: slices `:` yield integers, not numerics
# Note: #L is integer, # on its own is numeric
cases <- list(
# Test one dim NULL
"dim0 null, dim1 slice" = list(soma_dim_0 = NULL, soma_dim_1 = 0:9),
"dim0 null, dim1 slice" = list(soma_dim_0 = 35:45, soma_dim_1 = NULL),
"dim0 null, dim1 coords" = list(
soma_dim_0 = NULL,
soma_dim_1 = c(0L, 5L, 10L)
),
"dim0 coords, dim1 null" = list(soma_dim_0 = c(72, 83), soma_dim_1 = NULL),
# Test both dims null
"dim0 null, dim1 null" = list(soma_dim_0 = NULL, soma_dim_1 = NULL),
# Test both dims provided
"dim0 coords, dim1 coords" = list(
soma_dim_0 = c(72, 83),
soma_dim_1 = c(0L, 5L, 10L)
),
"dim0 slice, dim1 slice" = list(soma_dim_0 = 35:45, soma_dim_1 = 0:9),
"dim0 coords, dim1 slice" = list(soma_dim_0 = c(72, 83), soma_dim_1 = 0:9),
"dim0 slice, dim0 coords" = list(
soma_dim_0 = 35:45,
soma_dim_1 = c(0L, 5L, 10L)
),
# Test one dim missing
"dim0 missing, dim1 slice" = list(soma_dim_1 = 0:9),
"dim0 missing, dim1 coords" = list(soma_dim_1 = c(0L, 5L, 10L)),
"dim0 missing, dim1 null" = list(soma_dim_1 = NULL),
"dim0 slice, dim1 missing" = list(soma_dim_0 = 35:45),
"dim0 coords, dim1 missing" = list(soma_dim_0 = c(72, 83)),
"dim0 coords, dim1 null" = list(soma_dim_0 = NULL),
# Test zero-pull
"zero-pull" = list(soma_dim_0 = c(0, 3), soma_dim_1 = c(0L, 9L))
)
for (i in seq_along(cases)) {
coords <- cases[[i]]
label <- names(cases)[i]
expect_s3_class(tbl <- ndarray$read(coords)$tables()$concat(), "Table")
ii <- if (is.null(coords$soma_dim_0)) {
TRUE
} else {
mat@i %in% coords$soma_dim_0
}
jj <- if (is.null(coords$soma_dim_1)) {
TRUE
} else {
mat@j %in% coords$soma_dim_1
}
nr <- ifelse(isTRUE(ii) && isTRUE(jj), yes = length(mat@x), no = sum(ii & jj))
expect_identical(nrow(tbl), nr, label = label)
}

# Test assertions
list_cases <- list(TRUE, "tomato", 1L, 1.1, bit64::as.integer64(1L), list())
for (coords in list_cases) {
expect_error(ndarray$read(coords), regexp = "'coords' must be a list")
}

intgerish_cases <- list(
list(TRUE),
list("tomato"),
list(1.1),
list(NA_integer_),
list(NA_real_),
list(bit64::NA_integer64_),
list(Inf),
list(-4),
list(factor(letters[1:10])),
list(matrix(1:10, ncol = 1:10)),
list(array(1:10))
)
for (coords in intgerish_cases) {
expect_error(
ndarray$read(coords),
regexp = "'coords' must be a list integerish vectors"
)
}

names_cases <- list(
list(1:3, 1:5, 1:10),
list(tomato = 1:10),
list(soma_dim_0 = 1:10, tomato = 1:10)
)
for (coords in names_cases) {
expect_error(
ndarray$read(coords),
regexp = "'coords' if unnamed must have length of dim names, else if named names must match dim names"
)
}
})

test_that("SOMASparseNDArray creation with duplicates", {
skip_if(!extended_tests())
uri <- tempfile(pattern="sparse-ndarray")
Expand Down

0 comments on commit 85339bd

Please sign in to comment.