From 78fe40e5807405409656001928db4340efa43878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 06:36:50 +0100 Subject: [PATCH 01/21] Remove dead code --- R/dbQuoteIdentifier__duckdb_connection.R | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/R/dbQuoteIdentifier__duckdb_connection.R b/R/dbQuoteIdentifier__duckdb_connection.R index 27f1a29e5..4470bd282 100644 --- a/R/dbQuoteIdentifier__duckdb_connection.R +++ b/R/dbQuoteIdentifier__duckdb_connection.R @@ -11,7 +11,7 @@ dbQuoteIdentifier__duckdb_connection <- function(conn, x, ...) { } x <- enc2utf8(x) - needs_escape <- !grepl("^[a-zA-Z_][a-zA-Z0-9_]*$", x) | tolower(x) %in% reserved_words(conn) + needs_escape <- !grepl("^[a-zA-Z_][a-zA-Z0-9_]*$", x) | tolower(x) %in% conn@reserved_words x[needs_escape] <- paste0('"', gsub('"', '""', x[needs_escape]), '"') SQL(x, names = names(x)) @@ -30,17 +30,6 @@ setMethod("dbQuoteIdentifier", signature("duckdb_connection", "SQL"), dbQuoteIde #' @usage NULL setMethod("dbQuoteIdentifier", signature("duckdb_connection", "Id"), dbQuoteIdentifier__duckdb_connection) -reserved_words <- function(con) { - if (!isS4(con)) { - con <- dbConnect(duckdb()) - words <- get_reserved_words(con) - dbDisconnect(con, shutdown = TRUE) - words - } - - con@reserved_words -} - get_reserved_words <- function(con) { dbGetQuery(con, "SELECT keyword_name FROM duckdb_keywords()")[[1]] } From 609fbee0c829df739debadab3da40e8e398a7631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 15:13:41 +0100 Subject: [PATCH 02/21] Use IN_MEMORY_PATH --- src/database.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/database.cpp b/src/database.cpp index 49a7deca5..3e6693266 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -24,7 +24,7 @@ static bool CastRstringToVarchar(Vector &source, Vector &result, idx_t count, Ca [[cpp11::register]] duckdb::db_eptr_t rapi_startup(std::string dbdir, bool readonly, cpp11::list configsexp) { const char *dbdirchar; - if (dbdir.length() == 0 || dbdir.compare(":memory:") == 0) { + if (dbdir.length() == 0 || dbdir.compare(IN_MEMORY_PATH) == 0) { dbdirchar = NULL; } else { dbdirchar = dbdir.c_str(); From 4699629245ac756a49b09552965e21886dc944af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 06:32:24 +0100 Subject: [PATCH 03/21] Flip dbGetInfo() logic --- R/dbGetInfo__duckdb_connection.R | 7 ++++--- R/dbGetInfo__duckdb_driver.R | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/dbGetInfo__duckdb_connection.R b/R/dbGetInfo__duckdb_connection.R index 009c34471..17c350f1b 100644 --- a/R/dbGetInfo__duckdb_connection.R +++ b/R/dbGetInfo__duckdb_connection.R @@ -2,10 +2,11 @@ #' @inheritParams DBI::dbGetInfo #' @usage NULL dbGetInfo__duckdb_connection <- function(dbObj, ...) { - info <- dbGetInfo(dbObj@driver) + version <- dbGetQuery(dbObj, "select library_version from pragma_version()")[[1]][[1]] + list( - dbname = info$dbname, - db.version = info$driver.version, + dbname = dbObj@driver@dbdir, + db.version = version, username = NA, host = NA, port = NA diff --git a/R/dbGetInfo__duckdb_driver.R b/R/dbGetInfo__duckdb_driver.R index 86e7af038..e3f1a172e 100644 --- a/R/dbGetInfo__duckdb_driver.R +++ b/R/dbGetInfo__duckdb_driver.R @@ -2,10 +2,12 @@ #' @inheritParams DBI::dbGetInfo #' @usage NULL dbGetInfo__duckdb_driver <- function(dbObj, ...) { - con <- dbConnect(dbObj) - version <- dbGetQuery(con, "select library_version from pragma_version()")[[1]][[1]] - dbDisconnect(con) - list(driver.version = version, client.version = version, dbname = dbObj@dbdir) + info <- dbGetInfo__duckdb_connection(default_connection()) + list( + driver.version = info$db.version, + client.version = info$db.version, + dbname = dbObj@dbdir + ) } #' @rdname duckdb_driver-class From a202cb83d6ac5ff22d1a23bc7416689317f37902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 14:36:30 +0100 Subject: [PATCH 04/21] Avoid showing database_ref for driver information --- R/Driver.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Driver.R b/R/Driver.R index 93837e929..9c977cebf 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -15,7 +15,7 @@ drv_to_string <- function(drv) { if (!is(drv, "duckdb_driver")) { stop("pass a duckdb_driver object") } - sprintf("", extptr_str(drv@database_ref), drv@dbdir, drv@read_only, drv@bigint) + sprintf("", drv@dbdir, drv@read_only, drv@bigint) } #' @description From d84acb20532aed640723e1bab8adb391d05185ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 05:28:00 +0100 Subject: [PATCH 05/21] Efficient lookup --- src/register.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/register.cpp b/src/register.cpp index a36f3afd8..bb1003fd8 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -205,18 +205,17 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const auto &data = (ArrowScanReplacementData &)*data_p; auto db_wrapper = data.wrapper; lock_guard arrow_scans_lock(db_wrapper->lock); - for (auto &e : db_wrapper->arrow_scans) { - if (e.first == table_name) { - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value::POINTER((uintptr_t)R_ExternalPtrAddr(e.second)))); - children.push_back( - make_uniq(Value::POINTER((uintptr_t)RArrowTabularStreamFactory::Produce))); - children.push_back( - make_uniq(Value::POINTER((uintptr_t)RArrowTabularStreamFactory::GetSchema))); - table_function->function = make_uniq("arrow_scan", std::move(children)); - return std::move(table_function); - } + const auto &arrow_scans = db_wrapper->arrow_scans; + for (auto e = arrow_scans.find(table_name); e != arrow_scans.end(); ++e) { + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value::POINTER((uintptr_t)R_ExternalPtrAddr(e->second)))); + children.push_back( + make_uniq(Value::POINTER((uintptr_t)RArrowTabularStreamFactory::Produce))); + children.push_back( + make_uniq(Value::POINTER((uintptr_t)RArrowTabularStreamFactory::GetSchema))); + table_function->function = make_uniq("arrow_scan", std::move(children)); + return std::move(table_function); } return nullptr; } From 400cb6b64f501897e3f1d5a3bed0e4074697c453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 05:33:30 +0100 Subject: [PATCH 06/21] rapi_list_arrow() --- R/cpp11.R | 4 ++++ R/register.R | 2 +- src/cpp11.cpp | 8 ++++++++ src/register.cpp | 14 ++++++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/R/cpp11.R b/R/cpp11.R index a70d0f32c..23a7237ca 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -32,6 +32,10 @@ rapi_unregister_arrow <- function(conn, name) { invisible(.Call(`_duckdb_rapi_unregister_arrow`, conn, name)) } +rapi_list_arrow <- function(conn) { + .Call(`_duckdb_rapi_list_arrow`, conn) +} + rapi_expr_reference <- function(rnames) { .Call(`_duckdb_rapi_expr_reference`, rnames) } diff --git a/R/register.R b/R/register.R index f11c11227..58c393f2d 100644 --- a/R/register.R +++ b/R/register.R @@ -113,5 +113,5 @@ duckdb_unregister_arrow <- function(conn, name) { #' @rdname duckdb_register_arrow #' @export duckdb_list_arrow <- function(conn) { - sort(gsub("_registered_arrow_", "", names(attributes(conn@driver@database_ref)), fixed = TRUE)) + sort(rapi_list_arrow(conn@conn_ref)) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 03c07a280..fefd5aada 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -67,6 +67,13 @@ extern "C" SEXP _duckdb_rapi_unregister_arrow(SEXP conn, SEXP name) { return R_NilValue; END_CPP11 } +// register.cpp +cpp11::strings rapi_list_arrow(duckdb::conn_eptr_t conn); +extern "C" SEXP _duckdb_rapi_list_arrow(SEXP conn) { + BEGIN_CPP11 + return cpp11::as_sexp(rapi_list_arrow(cpp11::as_cpp>(conn))); + END_CPP11 +} // relational.cpp SEXP rapi_expr_reference(r_vector rnames); extern "C" SEXP _duckdb_rapi_expr_reference(SEXP rnames) { @@ -396,6 +403,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_get_null_SEXP_ptr", (DL_FUNC) &_duckdb_rapi_get_null_SEXP_ptr, 0}, {"_duckdb_rapi_get_substrait", (DL_FUNC) &_duckdb_rapi_get_substrait, 3}, {"_duckdb_rapi_get_substrait_json", (DL_FUNC) &_duckdb_rapi_get_substrait_json, 3}, + {"_duckdb_rapi_list_arrow", (DL_FUNC) &_duckdb_rapi_list_arrow, 1}, {"_duckdb_rapi_prepare", (DL_FUNC) &_duckdb_rapi_prepare, 2}, {"_duckdb_rapi_prepare_substrait", (DL_FUNC) &_duckdb_rapi_prepare_substrait, 2}, {"_duckdb_rapi_prepare_substrait_json", (DL_FUNC) &_duckdb_rapi_prepare_substrait_json, 2}, diff --git a/src/register.cpp b/src/register.cpp index bb1003fd8..52e803f66 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -255,3 +255,17 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const } static_cast(conn->db_eptr).attr("_registered_arrow_" + name) = R_NilValue; } + +[[cpp11::register]] cpp11::strings rapi_list_arrow(duckdb::conn_eptr_t conn) { + lock_guard arrow_scans_lock(conn->db_eptr->lock); + const auto &arrow_scans = conn->db_eptr->arrow_scans; + + cpp11::writable::strings names; + names.reserve(arrow_scans.size()); + + for (const auto &e : arrow_scans) { + names.push_back(e.first); + } + + return names; +} From 1b1d96350552a1ecb7f4372779bba1099b884713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 05:39:13 +0100 Subject: [PATCH 07/21] Check duplicate names in rapi_register_arrow() --- src/register.cpp | 6 +++++- tests/testthat/test-register_arrow.R | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/register.cpp b/src/register.cpp index 52e803f66..7cbad2d53 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -235,9 +235,13 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const cpp11::external_pointer factorysexp(stream_factory); { - // TODO check if this name already exists? lock_guard arrow_scans_lock(conn->db_eptr->lock); auto &arrow_scans = conn->db_eptr->arrow_scans; + + for (auto e = arrow_scans.find(name); e != arrow_scans.end(); ++e) { + cpp11::stop("rapi_register_arrow: Arrow table '%s' already registered", name.c_str()); + } + arrow_scans[name] = factorysexp; } cpp11::writable::list state_list = {export_funs, valuesexp, factorysexp}; diff --git a/tests/testthat/test-register_arrow.R b/tests/testthat/test-register_arrow.R index 8b104597e..f9c29ee2c 100644 --- a/tests/testthat/test-register_arrow.R +++ b/tests/testthat/test-register_arrow.R @@ -448,6 +448,19 @@ test_that("we can list registered arrow tables", { }) +test_that("registered tables must be unique", { + con <- DBI::dbConnect(duckdb()) + on.exit(dbDisconnect(con, shutdown = TRUE)) + ds1 <- arrow::InMemoryDataset$create(mtcars) + ds2 <- arrow::InMemoryDataset$create(mtcars) + + expect_equal(length(duckdb_list_arrow(con)), 0) + + duckdb_register_arrow(con, "t1", ds1) + expect_error(duckdb_register_arrow(con, "t1", ds2), "'t1' already registered") +}) + + test_that("duckdb can read arrow timestamps", { con <- DBI::dbConnect(duckdb(), timezone_out = "UTC") on.exit(dbDisconnect(con, shutdown = TRUE)) From 150b2a1c01457dadc6c2bd45d3adf8dd79398e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 05:44:06 +0100 Subject: [PATCH 08/21] Store state_list in arrow_scans --- src/include/rapi.hpp | 2 +- src/register.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/include/rapi.hpp b/src/include/rapi.hpp index 80b227c2b..9f6a3713b 100644 --- a/src/include/rapi.hpp +++ b/src/include/rapi.hpp @@ -18,7 +18,7 @@ namespace duckdb { -typedef unordered_map arrow_scans_t; +typedef unordered_map arrow_scans_t; struct DBWrapper { duckdb::unique_ptr db; diff --git a/src/register.cpp b/src/register.cpp index 7cbad2d53..68b83b3d1 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -209,7 +209,7 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const for (auto e = arrow_scans.find(table_name); e != arrow_scans.end(); ++e) { auto table_function = make_uniq(); vector> children; - children.push_back(make_uniq(Value::POINTER((uintptr_t)R_ExternalPtrAddr(e->second)))); + children.push_back(make_uniq(Value::POINTER((uintptr_t)R_ExternalPtrAddr(e->second[0])))); children.push_back( make_uniq(Value::POINTER((uintptr_t)RArrowTabularStreamFactory::Produce))); children.push_back( @@ -234,6 +234,8 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const // make r external ptr object to keep factory around until arrow table is unregistered cpp11::external_pointer factorysexp(stream_factory); + // factorysexp must occur first here, used in ArrowScanReplacement() + cpp11::writable::list state_list = {factorysexp, export_funs, valuesexp}; { lock_guard arrow_scans_lock(conn->db_eptr->lock); auto &arrow_scans = conn->db_eptr->arrow_scans; @@ -242,9 +244,8 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const cpp11::stop("rapi_register_arrow: Arrow table '%s' already registered", name.c_str()); } - arrow_scans[name] = factorysexp; + arrow_scans[name] = state_list; } - cpp11::writable::list state_list = {export_funs, valuesexp, factorysexp}; static_cast(conn->db_eptr).attr("_registered_arrow_" + name) = state_list; } From f0aa3e6a2102d95c4da030d577d900d770b3e0cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 05:45:22 +0100 Subject: [PATCH 09/21] Avoid attribute per registered Arrow scan --- src/register.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/register.cpp b/src/register.cpp index 68b83b3d1..85434d366 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -246,7 +246,6 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const arrow_scans[name] = state_list; } - static_cast(conn->db_eptr).attr("_registered_arrow_" + name) = state_list; } [[cpp11::register]] void rapi_unregister_arrow(duckdb::conn_eptr_t conn, std::string name) { @@ -258,7 +257,6 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const auto &arrow_scans = conn->db_eptr->arrow_scans; arrow_scans.erase(name); } - static_cast(conn->db_eptr).attr("_registered_arrow_" + name) = R_NilValue; } [[cpp11::register]] cpp11::strings rapi_list_arrow(duckdb::conn_eptr_t conn) { From 35933409ff9c60b7299a52cd473515e466c4394f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 14:02:50 +0100 Subject: [PATCH 10/21] Store config with driver object --- R/Connection.R | 1 + R/Driver.R | 1 + 2 files changed, 2 insertions(+) diff --git a/R/Connection.R b/R/Connection.R index a8d8aa7f7..87fd4daca 100644 --- a/R/Connection.R +++ b/R/Connection.R @@ -7,6 +7,7 @@ #' @export setClass("duckdb_driver", contains = "DBIDriver", slots = list( database_ref = "externalptr", + config = "list", dbdir = "character", read_only = "logical", bigint = "character" diff --git a/R/Driver.R b/R/Driver.R index 9c977cebf..284417efd 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -50,6 +50,7 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", new( "duckdb_driver", + config = config, database_ref = rapi_startup(dbdir, read_only, config), dbdir = dbdir, read_only = read_only, From 5c6653ebc53db423e242423cd5d038b3b437cb84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 14:43:58 +0100 Subject: [PATCH 11/21] Fix inheritance of connection config from driver config --- R/dbConnect__duckdb_driver.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index ddb553d31..463d93b60 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -53,11 +53,18 @@ dbConnect__duckdb_driver <- function( timezone_out <- check_tz(timezone_out) tz_out_convert <- match.arg(tz_out_convert) - missing_dbdir <- missing(dbdir) - dbdir <- path.expand(as.character(dbdir)) + if (missing(dbdir)) { + dbdir <- drv@dbdir + } + + if (missing(read_only)) { + read_only <- drv@read_only + } + + config <- utils::modifyList(drv@config, config) # aha, a late comer. let's make a new instance. - if (!missing_dbdir && dbdir != drv@dbdir) { + if (dbdir != drv@dbdir) { drv <- duckdb(dbdir, read_only, bigint, config) } From e06931877602df034f411126be95780642d67789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 07:11:11 +0100 Subject: [PATCH 12/21] bigint are stored as a connection slot --- R/Connection.R | 8 +++++--- R/Result.R | 2 +- R/dbBind__duckdb_result.R | 2 +- R/dbConnect__duckdb_driver.R | 6 +++++- R/register.R | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/R/Connection.R b/R/Connection.R index 87fd4daca..82f3d226b 100644 --- a/R/Connection.R +++ b/R/Connection.R @@ -26,17 +26,19 @@ setClass("duckdb_connection", contains = "DBIConnection", slots = list( debug = "logical", timezone_out = "character", tz_out_convert = "character", - reserved_words = "character" + reserved_words = "character", + bigint = "character" )) -duckdb_connection <- function(duckdb_driver, debug) { +duckdb_connection <- function(duckdb_driver, debug, bigint) { out <- new( "duckdb_connection", conn_ref = rapi_connect(duckdb_driver@database_ref), driver = duckdb_driver, debug = debug, timezone_out = "UTC", - tz_out_convert = "with" + tz_out_convert = "with", + bigint = bigint ) out@reserved_words <- get_reserved_words(out) out diff --git a/R/Result.R b/R/Result.R index 4b5756af9..be89bf1f2 100644 --- a/R/Result.R +++ b/R/Result.R @@ -40,7 +40,7 @@ duckdb_result <- function(connection, stmt_lst, arrow) { } duckdb_execute <- function(res) { - out <- rapi_execute(res@stmt_lst$ref, res@arrow, res@connection@driver@bigint == "integer64") + out <- rapi_execute(res@stmt_lst$ref, res@arrow, res@connection@bigint == "integer64") duckdb_post_execute(res, out) } diff --git a/R/dbBind__duckdb_result.R b/R/dbBind__duckdb_result.R index bb03e177f..343341bdb 100644 --- a/R/dbBind__duckdb_result.R +++ b/R/dbBind__duckdb_result.R @@ -15,7 +15,7 @@ dbBind__duckdb_result <- function(res, params, ...) { params <- encode_values(params) - out <- rapi_bind(res@stmt_lst$ref, params, res@arrow, res@connection@driver@bigint == "integer64") + out <- rapi_bind(res@stmt_lst$ref, params, res@arrow, res@connection@bigint == "integer64") if (length(out) == 1) { out <- out[[1]] } else if (length(out) == 0) { diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index 463d93b60..a09036044 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -61,6 +61,10 @@ dbConnect__duckdb_driver <- function( read_only <- drv@read_only } + if (missing(bigint)) { + bigint <- drv@bigint + } + config <- utils::modifyList(drv@config, config) # aha, a late comer. let's make a new instance. @@ -68,7 +72,7 @@ dbConnect__duckdb_driver <- function( drv <- duckdb(dbdir, read_only, bigint, config) } - conn <- duckdb_connection(drv, debug = debug) + conn <- duckdb_connection(drv, debug = debug, bigint = bigint) on.exit(dbDisconnect(conn)) conn@timezone_out <- timezone_out diff --git a/R/register.R b/R/register.R index 58c393f2d..a7a1863e0 100644 --- a/R/register.R +++ b/R/register.R @@ -45,7 +45,7 @@ encode_values <- function(value) { duckdb_register <- function(conn, name, df, overwrite = FALSE, experimental = FALSE) { stopifnot(dbIsValid(conn)) df <- encode_values(as.data.frame(df)) - rapi_register_df(conn@conn_ref, enc2utf8(as.character(name)), df, conn@driver@bigint == "integer64", overwrite, experimental) + rapi_register_df(conn@conn_ref, enc2utf8(as.character(name)), df, conn@bigint == "integer64", overwrite, experimental) invisible(TRUE) } From 27bba58e12d265527e0c64da97dd3ab63d2b2c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 14:42:59 +0100 Subject: [PATCH 13/21] path_normalize() --- R/Driver.R | 18 ++++++++++++++++++ R/dbConnect__duckdb_driver.R | 6 ++++-- man/duckdb.Rd | 4 ++-- tests/testthat/test-dbinfo.R | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/R/Driver.R b/R/Driver.R index 284417efd..7cfce0d0b 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -48,6 +48,8 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", config["secret_directory"] <- file.path(tools::R_user_dir("duckdb", "data"), "stored_secrets") } + dbdir <- path_normalize(dbdir) + new( "duckdb_driver", config = config, @@ -140,3 +142,19 @@ check_tz <- function(timezone) { timezone } + +path_normalize <- function(path) { + if (path == "" || path == DBDIR_MEMORY) { + return(DBDIR_MEMORY) + } + + out <- normalizePath(path, mustWork = FALSE) + + # Stable results are only guaranteed if the file exists + if (!file.exists(out)) { + on.exit(unlink(out)) + writeLines(character(), out) + out <- normalizePath(out, mustWork = TRUE) + } + out +} diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index a09036044..14200002d 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -4,8 +4,8 @@ #' #' @param drv Object returned by `duckdb()` #' @param dbdir Location for database files. Should be a path to an existing -#' directory in the file system. With the default, all -#' data is kept in RAM +#' directory in the file system. With the default (or `""`), all +#' data is kept in RAM. #' @param ... Ignored #' @param debug Print additional debug information such as queries #' @param read_only Set to `TRUE` for read-only operation @@ -55,6 +55,8 @@ dbConnect__duckdb_driver <- function( if (missing(dbdir)) { dbdir <- drv@dbdir + } else { + dbdir <- path_normalize(dbdir) } if (missing(read_only)) { diff --git a/man/duckdb.Rd b/man/duckdb.Rd index 7f1268fc4..713c52671 100644 --- a/man/duckdb.Rd +++ b/man/duckdb.Rd @@ -38,8 +38,8 @@ duckdb_adbc() } \arguments{ \item{dbdir}{Location for database files. Should be a path to an existing -directory in the file system. With the default, all -data is kept in RAM} +directory in the file system. With the default (or \code{""}), all +data is kept in RAM.} \item{read_only}{Set to \code{TRUE} for read-only operation} diff --git a/tests/testthat/test-dbinfo.R b/tests/testthat/test-dbinfo.R index 52118d483..bed830b54 100644 --- a/tests/testthat/test-dbinfo.R +++ b/tests/testthat/test-dbinfo.R @@ -1,5 +1,5 @@ test_that("dbGetInfo returns something meaningful", { - dbdir <- tempfile() + dbdir <- path_normalize(tempfile()) drv <- duckdb(dbdir) info_drv <- dbGetInfo(drv) From bc94f3ca69d7bf9d65a7dbf31305e59dc4b5351a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 23 Mar 2024 20:40:53 +0100 Subject: [PATCH 14/21] driver_registry --- R/Driver.R | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/R/Driver.R b/R/Driver.R index 7cfce0d0b..2f2ba23cf 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -18,6 +18,8 @@ drv_to_string <- function(drv) { sprintf("", drv@dbdir, drv@read_only, drv@bigint) } +driver_registry <- new.env(parent = emptyenv()) + #' @description #' `duckdb()` creates or reuses a database instance. #' @@ -49,8 +51,17 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", } dbdir <- path_normalize(dbdir) + if (dbdir != DBDIR_MEMORY) { + drv <- driver_registry[[dbdir]] + if (!is.null(drv)) { + # FIXME: Check that readonly and config are identical + return(drv) + } + } - new( + # Always create new database for in-memory, + # allows mixing different configs + drv <- new( "duckdb_driver", config = config, database_ref = rapi_startup(dbdir, read_only, config), @@ -58,6 +69,11 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", read_only = read_only, bigint = bigint ) + + if (dbdir != DBDIR_MEMORY) { + driver_registry[[dbdir]] <- drv + } + drv } #' @description @@ -76,6 +92,11 @@ duckdb_shutdown <- function(drv) { invisible(FALSE) } rapi_shutdown(drv@database_ref) + + if (drv@dbdir != DBDIR_MEMORY) { + rm(list = drv@dbdir, envir = driver_registry) + } + invisible(TRUE) } From c3839595c6ebe103d0a235dcbb9ab859e1063a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 06:04:11 +0100 Subject: [PATCH 15/21] db_eptr stores dual pointer --- R/cpp11.R | 4 +-- src/connection.cpp | 11 +++++--- src/cpp11.cpp | 6 ++--- src/database.cpp | 11 +++----- src/include/rapi.hpp | 60 +++++++++++++++++++++++++++++++++++++++++--- src/register.cpp | 12 ++++----- 6 files changed, 80 insertions(+), 24 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 23a7237ca..b9c37d702 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,7 +1,7 @@ # Generated by cpp11: do not edit by hand -rapi_connect <- function(db) { - .Call(`_duckdb_rapi_connect`, db) +rapi_connect <- function(dual) { + .Call(`_duckdb_rapi_connect`, dual) } rapi_disconnect <- function(conn) { diff --git a/src/connection.cpp b/src/connection.cpp index d5c4207d7..4f7cc7fbe 100644 --- a/src/connection.cpp +++ b/src/connection.cpp @@ -7,13 +7,18 @@ void duckdb::ConnDeleter(ConnWrapper *conn) { delete conn; } -[[cpp11::register]] duckdb::conn_eptr_t rapi_connect(duckdb::db_eptr_t db) { - if (!db || !db.get() || !db->db) { +[[cpp11::register]] duckdb::conn_eptr_t rapi_connect(duckdb::db_eptr_t dual) { + if (!dual || !dual.get()) { cpp11::stop("rapi_connect: Invalid database reference"); } + auto db = dual->get(); + if (!db || !db->db) { + cpp11::stop("rapi_connect: Database already closed"); + } + auto conn_wrapper = new ConnWrapper(); conn_wrapper->conn = make_uniq(*db->db); - conn_wrapper->db_eptr.swap(db); + conn_wrapper->db.swap(db); return conn_eptr_t(conn_wrapper); } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index fefd5aada..d5457d865 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -6,10 +6,10 @@ #include // connection.cpp -duckdb::conn_eptr_t rapi_connect(duckdb::db_eptr_t db); -extern "C" SEXP _duckdb_rapi_connect(SEXP db) { +duckdb::conn_eptr_t rapi_connect(duckdb::db_eptr_t dual); +extern "C" SEXP _duckdb_rapi_connect(SEXP dual) { BEGIN_CPP11 - return cpp11::as_sexp(rapi_connect(cpp11::as_cpp>(db))); + return cpp11::as_sexp(rapi_connect(cpp11::as_cpp>(dual))); END_CPP11 } // connection.cpp diff --git a/src/database.cpp b/src/database.cpp index 3e6693266..2afe77c72 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -8,12 +8,6 @@ using namespace duckdb; -void duckdb::DBDeleter(DBWrapper *db) { - cpp11::warning("Database is garbage-collected, use dbDisconnect(con, shutdown=TRUE) or " - "duckdb::duckdb_shutdown(drv) to avoid this."); - delete db; -} - static bool CastRstringToVarchar(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { GenericExecutor::ExecuteUnary, PrimitiveType>( source, result, count, @@ -77,12 +71,15 @@ static bool CastRstringToVarchar(Vector &source, Vector &result, idx_t count, Ca context.transaction.Commit(); - return db_eptr_t(wrapper); + auto dual = new DBWrapperDual(wrapper); + + return db_eptr_t(dual); } [[cpp11::register]] void rapi_shutdown(duckdb::db_eptr_t dbsexp) { auto db_wrapper = dbsexp.release(); if (db_wrapper) { + db_wrapper->reset(); delete db_wrapper; } } diff --git a/src/include/rapi.hpp b/src/include/rapi.hpp index 9f6a3713b..055ac6767 100644 --- a/src/include/rapi.hpp +++ b/src/include/rapi.hpp @@ -26,12 +26,66 @@ struct DBWrapper { mutex lock; }; -void DBDeleter(DBWrapper *); -typedef cpp11::external_pointer db_eptr_t; +template +class DualWrapper { +public: + DualWrapper(T *db) : precious_(db) {} + DualWrapper(std::shared_ptr db) : precious_(db) {} + DualWrapper(DualWrapper *dual) : precious_(dual->get()) { + if (!precious_) { + cpp11::stop("dual is already released"); + } + } + ~DualWrapper() { + if (has()) { + cpp11::warning("Database is garbage-collected, use dbDisconnect(con, shutdown=TRUE) or " + "duckdb::duckdb_shutdown(drv) to avoid this."); + } + } + + std::shared_ptr get() const { + if (precious_) { + return precious_; + } else { + return disposable_.lock(); + } + } + + std::shared_ptr operator->() const { + return get(); + } + + bool has() const { + return !!get(); + } + + void lock() { + precious_ = get(); + disposable_.reset(); + } + + void unlock() { + disposable_ = get(); + precious_.reset(); + } + + void reset() { + precious_.reset(); + disposable_.reset(); + } + +private: + std::shared_ptr precious_; + std::weak_ptr disposable_; +}; + +typedef DualWrapper DBWrapperDual; + +typedef cpp11::external_pointer db_eptr_t; struct ConnWrapper { duckdb::unique_ptr conn; - db_eptr_t db_eptr; + std::shared_ptr db; }; void ConnDeleter(ConnWrapper *); diff --git a/src/register.cpp b/src/register.cpp index 85434d366..e470376e8 100644 --- a/src/register.cpp +++ b/src/register.cpp @@ -237,8 +237,8 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const // factorysexp must occur first here, used in ArrowScanReplacement() cpp11::writable::list state_list = {factorysexp, export_funs, valuesexp}; { - lock_guard arrow_scans_lock(conn->db_eptr->lock); - auto &arrow_scans = conn->db_eptr->arrow_scans; + lock_guard arrow_scans_lock(conn->db->lock); + auto &arrow_scans = conn->db->arrow_scans; for (auto e = arrow_scans.find(name); e != arrow_scans.end(); ++e) { cpp11::stop("rapi_register_arrow: Arrow table '%s' already registered", name.c_str()); @@ -253,15 +253,15 @@ unique_ptr duckdb::ArrowScanReplacement(ClientContext &context, const return; // if the connection is already dead there is probably no point in cleaning this } { - lock_guard arrow_scans_lock(conn->db_eptr->lock); - auto &arrow_scans = conn->db_eptr->arrow_scans; + lock_guard arrow_scans_lock(conn->db->lock); + auto &arrow_scans = conn->db->arrow_scans; arrow_scans.erase(name); } } [[cpp11::register]] cpp11::strings rapi_list_arrow(duckdb::conn_eptr_t conn) { - lock_guard arrow_scans_lock(conn->db_eptr->lock); - const auto &arrow_scans = conn->db_eptr->arrow_scans; + lock_guard arrow_scans_lock(conn->db->lock); + const auto &arrow_scans = conn->db->arrow_scans; cpp11::writable::strings names; names.reserve(arrow_scans.size()); From ec1852141bfb4607236cd2abc9e927aa7b8bf985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 07:45:09 +0100 Subject: [PATCH 16/21] rapi_lock() and rapi_is_locked() --- R/Driver.R | 6 ++++-- R/cpp11.R | 8 ++++++++ R/dbConnect__duckdb_driver.R | 2 +- R/dbDisconnect__duckdb_connection.R | 12 +++++------- R/dbIsValid__duckdb_driver.R | 6 ++++++ man/duckdb.Rd | 9 +++++---- src/connection.cpp | 6 ++++++ src/cpp11.cpp | 16 ++++++++++++++++ src/database.cpp | 15 +++++++++++++++ src/include/rapi.hpp | 6 +++++- tests/testthat/helper-DBItest.R | 6 ++++-- 11 files changed, 75 insertions(+), 17 deletions(-) diff --git a/R/Driver.R b/R/Driver.R index 2f2ba23cf..de6125bc6 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -53,14 +53,16 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", dbdir <- path_normalize(dbdir) if (dbdir != DBDIR_MEMORY) { drv <- driver_registry[[dbdir]] - if (!is.null(drv)) { + # We reuse an existing driver object if the database is still alive. + # If not, we fall back to creating a new driver object with a new database. + if (!is.null(drv) && rapi_lock(drv@database_ref)) { # FIXME: Check that readonly and config are identical return(drv) } } # Always create new database for in-memory, - # allows mixing different configs + # allows isolation and mixing different configs drv <- new( "duckdb_driver", config = config, diff --git a/R/cpp11.R b/R/cpp11.R index b9c37d702..dcefa2f78 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -12,6 +12,14 @@ rapi_startup <- function(dbdir, readonly, configsexp) { .Call(`_duckdb_rapi_startup`, dbdir, readonly, configsexp) } +rapi_lock <- function(dual) { + .Call(`_duckdb_rapi_lock`, dual) +} + +rapi_is_locked <- function(dual) { + .Call(`_duckdb_rapi_is_locked`, dual) +} + rapi_shutdown <- function(dbsexp) { invisible(.Call(`_duckdb_rapi_shutdown`, dbsexp)) } diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index 14200002d..581859c53 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -70,7 +70,7 @@ dbConnect__duckdb_driver <- function( config <- utils::modifyList(drv@config, config) # aha, a late comer. let's make a new instance. - if (dbdir != drv@dbdir) { + if (dbdir != drv@dbdir || !rapi_lock(drv@database_ref)) { drv <- duckdb(dbdir, read_only, bigint, config) } diff --git a/R/dbDisconnect__duckdb_connection.R b/R/dbDisconnect__duckdb_connection.R index 61678d586..37a307b91 100644 --- a/R/dbDisconnect__duckdb_connection.R +++ b/R/dbDisconnect__duckdb_connection.R @@ -1,20 +1,18 @@ #' @description -#' `dbDisconnect()` closes a DuckDB database connection, optionally shutting down -#' the associated instance. +#' `dbDisconnect()` closes a DuckDB database connection. +#' The associated DuckDB database instance is shut down automatically, +#' it is no longer necessary to set `shutdown = TRUE` or to call `duckdb_shutdown()`. #' #' @param conn A `duckdb_connection` object -#' @param shutdown Set to `TRUE` to shut down the DuckDB database instance that this connection refers to. +#' @param shutdown Unused. The database instance is shut down automatically. #' @rdname duckdb #' @usage NULL -dbDisconnect__duckdb_connection <- function(conn, ..., shutdown = FALSE) { +dbDisconnect__duckdb_connection <- function(conn, ..., shutdown = TRUE) { if (!dbIsValid(conn)) { warning("Connection already closed.", call. = FALSE) invisible(FALSE) } rapi_disconnect(conn@conn_ref) - if (shutdown) { - duckdb_shutdown(conn@driver) - } rs_on_connection_closed(conn) invisible(TRUE) } diff --git a/R/dbIsValid__duckdb_driver.R b/R/dbIsValid__duckdb_driver.R index fa324df23..9ca5b8a72 100644 --- a/R/dbIsValid__duckdb_driver.R +++ b/R/dbIsValid__duckdb_driver.R @@ -5,7 +5,13 @@ dbIsValid__duckdb_driver <- function(dbObj, ...) { valid <- FALSE tryCatch( { + was_locked <- rapi_is_locked(dbObj@database_ref) con <- dbConnect(dbObj) + # Keep driver alive, but only if needed + if (was_locked) { + rapi_lock(dbObj@database_ref) + } + dbExecute(con, SQL("SELECT 1")) dbDisconnect(con) valid <- TRUE diff --git a/man/duckdb.Rd b/man/duckdb.Rd index 713c52671..ef358114c 100644 --- a/man/duckdb.Rd +++ b/man/duckdb.Rd @@ -34,7 +34,7 @@ duckdb_adbc() bigint = "numeric" ) -\S4method{dbDisconnect}{duckdb_connection}(conn, ..., shutdown = FALSE) +\S4method{dbDisconnect}{duckdb_connection}(conn, ..., shutdown = TRUE) } \arguments{ \item{dbdir}{Location for database files. Should be a path to an existing @@ -66,7 +66,7 @@ time as the timestamp in the database, but with the new time zone.} \item{conn}{A \code{duckdb_connection} object} -\item{shutdown}{Set to \code{TRUE} to shut down the DuckDB database instance that this connection refers to.} +\item{shutdown}{Unused. The database instance is shut down automatically.} } \value{ \code{duckdb()} returns an object of class \linkS4class{duckdb_driver}. @@ -89,8 +89,9 @@ Connectivity via the adbcdrivermanager package. \code{dbConnect()} connects to a database instance. -\code{dbDisconnect()} closes a DuckDB database connection, optionally shutting down -the associated instance. +\code{dbDisconnect()} closes a DuckDB database connection. +The associated DuckDB database instance is shut down automatically, +it is no longer necessary to set \code{shutdown = TRUE} or to call \code{duckdb_shutdown()}. } \examples{ \dontshow{if (requireNamespace("adbcdrivermanager", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/src/connection.cpp b/src/connection.cpp index 4f7cc7fbe..184a5c2fd 100644 --- a/src/connection.cpp +++ b/src/connection.cpp @@ -20,6 +20,12 @@ void duckdb::ConnDeleter(ConnWrapper *conn) { conn_wrapper->conn = make_uniq(*db->db); conn_wrapper->db.swap(db); + // The connection now holds a reference to the database. + // This reference is released when the connection is closed. + // From the R side, the database pointer will remain valid + // as long as at least one connection to that database is open. + dual->unlock(); + return conn_eptr_t(conn_wrapper); } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index d5457d865..7c9c811f6 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -28,6 +28,20 @@ extern "C" SEXP _duckdb_rapi_startup(SEXP dbdir, SEXP readonly, SEXP configsexp) END_CPP11 } // database.cpp +bool rapi_lock(duckdb::db_eptr_t dual); +extern "C" SEXP _duckdb_rapi_lock(SEXP dual) { + BEGIN_CPP11 + return cpp11::as_sexp(rapi_lock(cpp11::as_cpp>(dual))); + END_CPP11 +} +// database.cpp +bool rapi_is_locked(duckdb::db_eptr_t dual); +extern "C" SEXP _duckdb_rapi_is_locked(SEXP dual) { + BEGIN_CPP11 + return cpp11::as_sexp(rapi_is_locked(cpp11::as_cpp>(dual))); + END_CPP11 +} +// database.cpp void rapi_shutdown(duckdb::db_eptr_t dbsexp); extern "C" SEXP _duckdb_rapi_shutdown(SEXP dbsexp) { BEGIN_CPP11 @@ -403,7 +417,9 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_get_null_SEXP_ptr", (DL_FUNC) &_duckdb_rapi_get_null_SEXP_ptr, 0}, {"_duckdb_rapi_get_substrait", (DL_FUNC) &_duckdb_rapi_get_substrait, 3}, {"_duckdb_rapi_get_substrait_json", (DL_FUNC) &_duckdb_rapi_get_substrait_json, 3}, + {"_duckdb_rapi_is_locked", (DL_FUNC) &_duckdb_rapi_is_locked, 1}, {"_duckdb_rapi_list_arrow", (DL_FUNC) &_duckdb_rapi_list_arrow, 1}, + {"_duckdb_rapi_lock", (DL_FUNC) &_duckdb_rapi_lock, 1}, {"_duckdb_rapi_prepare", (DL_FUNC) &_duckdb_rapi_prepare, 2}, {"_duckdb_rapi_prepare_substrait", (DL_FUNC) &_duckdb_rapi_prepare_substrait, 2}, {"_duckdb_rapi_prepare_substrait_json", (DL_FUNC) &_duckdb_rapi_prepare_substrait_json, 2}, diff --git a/src/database.cpp b/src/database.cpp index 2afe77c72..d6759535d 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -76,6 +76,21 @@ static bool CastRstringToVarchar(Vector &source, Vector &result, idx_t count, Ca return db_eptr_t(dual); } +[[cpp11::register]] bool rapi_lock(duckdb::db_eptr_t dual) { + if (!dual || !dual.get()) { + cpp11::stop("rapi_lock: Invalid database reference"); + } + dual->lock(); + return dual->has(); +} + +[[cpp11::register]] bool rapi_is_locked(duckdb::db_eptr_t dual) { + if (!dual || !dual.get()) { + cpp11::stop("rapi_is_locked: Invalid database reference"); + } + return dual->is_locked(); +} + [[cpp11::register]] void rapi_shutdown(duckdb::db_eptr_t dbsexp) { auto db_wrapper = dbsexp.release(); if (db_wrapper) { diff --git a/src/include/rapi.hpp b/src/include/rapi.hpp index 055ac6767..218af0178 100644 --- a/src/include/rapi.hpp +++ b/src/include/rapi.hpp @@ -38,7 +38,7 @@ class DualWrapper { } ~DualWrapper() { if (has()) { - cpp11::warning("Database is garbage-collected, use dbDisconnect(con, shutdown=TRUE) or " + cpp11::warning("Database is garbage-collected, use dbConnect(duckdb()) with dbDisconnect(), or " "duckdb::duckdb_shutdown(drv) to avoid this."); } } @@ -59,6 +59,10 @@ class DualWrapper { return !!get(); } + bool is_locked() const { + return !!precious_; + } + void lock() { precious_ = get(); disposable_.reset(); diff --git a/tests/testthat/helper-DBItest.R b/tests/testthat/helper-DBItest.R index dcd6607d4..383ee1a75 100644 --- a/tests/testthat/helper-DBItest.R +++ b/tests/testthat/helper-DBItest.R @@ -1,5 +1,7 @@ -drv <- duckdb() -reg.finalizer(drv@database_ref, function(x) rapi_shutdown(x)) +drv <- duckdb(tempfile(fileext = ".duckdb")) +reg.finalizer(drv@database_ref, function(x) { + unlink(drv@dbdir, force = TRUE) +}) # remotes::install_github("r-dbi/dblog") # Then, use DBItest::test_some() to see the DBI calls emitted by the tests From bb0308d0e6d643da337a18065bc36c8d896d2198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 10:20:18 +0100 Subject: [PATCH 17/21] Results, fixed now --- revdep/README.md | 6 +- revdep/cran.md | 4 +- revdep/problems.md | 387 ++++----------------------------------------- 3 files changed, 32 insertions(+), 365 deletions(-) diff --git a/revdep/README.md b/revdep/README.md index 746231d89..53f8b152d 100644 --- a/revdep/README.md +++ b/revdep/README.md @@ -2,7 +2,7 @@ ## New problems (1) -|package |version |error |warning |note | -|:---------------|:-------|:------|:-------|:----| -|[PatientProfiles](problems.md#patientprofiles)|0.6.2 |__+1__ | | | +|package |version |error |warning |note | +|:----------|:-------|:------|:-------|:----| +|[starwarsdb](problems.md#starwarsdb)|0.1.2 |__+1__ | |1 | diff --git a/revdep/cran.md b/revdep/cran.md index 6db29a99e..66a06c641 100644 --- a/revdep/cran.md +++ b/revdep/cran.md @@ -1,6 +1,6 @@ ## revdepcheck results -We checked 34 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. +We checked 39 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. * We saw 1 new problems * We failed to check 0 packages @@ -10,6 +10,6 @@ Issues with CRAN packages are summarised below. ### New problems (This reports the first line of each new failure) -* PatientProfiles +* starwarsdb checking tests ... ERROR diff --git a/revdep/problems.md b/revdep/problems.md index 7d619a7ca..f82845ef2 100644 --- a/revdep/problems.md +++ b/revdep/problems.md @@ -1,14 +1,14 @@ -# PatientProfiles +# starwarsdb
-* Version: 0.6.2 -* GitHub: NA -* Source code: https://github.com/cran/PatientProfiles -* Date/Publication: 2024-03-05 18:50:03 UTC -* Number of recursive dependencies: 174 +* Version: 0.1.2 +* GitHub: https://github.com/gadenbuie/starwarsdb +* Source code: https://github.com/cran/starwarsdb +* Date/Publication: 2020-11-02 23:50:02 UTC +* Number of recursive dependencies: 51 -Run `revdepcheck::cloud_details(, "PatientProfiles")` for more info +Run `revdepcheck::cloud_details(, "starwarsdb")` for more info
@@ -16,368 +16,35 @@ Run `revdepcheck::cloud_details(, "PatientProfiles")` for more info * checking tests ... ERROR ``` - Running ‘spelling.R’ Running ‘testthat.R’ Running the tests in ‘tests/testthat.R’ failed. Complete output: - > # This file is part of the standard setup for testthat. - > # It is recommended that you do not modify it. - > # - > # Where should you do additional test configuration? - > # Learn more about the roles of various files in: - > # * https://r-pkgs.org/tests.html - > # * https://testthat.r-lib.org/reference/test_package.html#special-files - > > library(testthat) - > library(PatientProfiles) + > library(starwarsdb) > - > test_check("PatientProfiles") - Starting 2 test processes - [ FAIL 8 | WARN 38 | SKIP 1 | PASS 644 ] - - ══ Skipped tests (1) ═══════════════════════════════════════════════════════════ - • empty test (1): 'test-summariseResult.R:209:1' + > test_check("starwarsdb") + [ FAIL 1 | WARN 1 | SKIP 0 | PASS 32 ] ══ Failed tests ════════════════════════════════════════════════════════════════ - ── Error ('test-addFutureObservation.R:126:3'): check working example with condition_occurrence ── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addFutureObservation.R:126:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addFutureObservation.R:198:3'): different name ───────────────── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addFutureObservation.R:198:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addFutureObservation.R:251:3'): priorHistory and future_observation - outside of observation period ── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addFutureObservation.R:251:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addFutureObservation.R:301:3'): multiple observation periods ─── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort2.* - FROM main.cohort2 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort2.*, observation_period_start_date, observation_period_end_date - FROM main.cohort2 - LEFT JOIN main.observation_period - ON (cohort2.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort2.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort2.subject_id = RHS.subject_id) AND - (cohort2.cohort_start_date = RHS.cohort_start_date) AND - (cohort2.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addFutureObservation.R:301:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addPriorObservation.R:106:3'): check working example with condition_occurrence ── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addPriorObservation.R:106:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addPriorObservation.R:158:3'): different name ────────────────── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addPriorObservation.R:158:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-addPriorObservation.R:209:3'): multiple observation periods ──── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort2.* - FROM main.cohort2 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort2.*, observation_period_start_date, observation_period_end_date - FROM main.cohort2 - LEFT JOIN main.observation_period - ON (cohort2.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort2.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort2.subject_id = RHS.subject_id) AND - (cohort2.cohort_start_date = RHS.cohort_start_date) AND - (cohort2.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-addPriorObservation.R:209:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) - ── Error ('test-summariseCharacteristics.R:51:3'): test summariseCharacteristics ── - Error in `dplyr::collect(removeClass(x, "cdm_table"))`: Failed to collect lazy table. - Caused by error: - ! rapi_prepare: Failed to prepare query SELECT cohort1.* - FROM main.cohort1 - WHERE NOT EXISTS ( - SELECT 1 FROM ( - SELECT cohort1.*, observation_period_start_date, observation_period_end_date - FROM main.cohort1 - LEFT JOIN main.observation_period - ON (cohort1.subject_id = observation_period.person_id) - ) RHS - WHERE - (cohort1.cohort_definition_id = RHS.cohort_definition_id) AND - (cohort1.subject_id = RHS.subject_id) AND - (cohort1.cohort_start_date = RHS.cohort_start_date) AND - (cohort1.cohort_end_date = RHS.cohort_end_date) AND - (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - ) - Error: Binder Error: Cannot compare values of type VARCHAR and type DATE - an explicit cast is required - LINE 15: (RHS.cohort_start_date >= RHS.observation_period_start_date AND RHS.cohort_start_date <= RHS.observation_period_end_date AND RHS.cohort_end_date >= RHS.observation_period_start_date AND RHS.cohort_end_date <= RHS.observation_period_end_date) - )... - ^ - Backtrace: - ▆ - 1. └─PatientProfiles::mockPatientProfiles(...) at test-summariseCharacteristics.R:51:3 - 2. └─omopgenerics::newCohortTable(cdm[[cohort]]) - 3. └─omopgenerics:::validateGeneratedCohortSet(cohort, soft = .softValidation) - 4. └─omopgenerics::checkCohortRequirements(...) - 5. └─omopgenerics:::checkObservationPeriod(...) - 6. ├─dplyr::collect(...) - 7. └─omopgenerics:::collect.cohort_table(...) - 8. ├─dplyr::collect(x) - 9. └─omopgenerics:::collect.cdm_table(x) - 10. ├─dplyr::collect(removeClass(x, "cdm_table")) - 11. └─dbplyr:::collect.tbl_sql(removeClass(x, "cdm_table")) - 12. └─base::tryCatch(...) - 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers) - 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]]) - 15. └─value[[3L]](cond) - 16. └─cli::cli_abort("Failed to collect lazy table.", parent = cnd) - 17. └─rlang::abort(...) + ── Failure ('test-connect.R:3:3'): connects and disconnects to duckdb in memory ── + con@driver@dbdir not equal to ":memory:". + 1/1 mismatches + x[1]: "" + y[1]: ":memory:" - [ FAIL 8 | WARN 38 | SKIP 1 | PASS 644 ] + [ FAIL 1 | WARN 1 | SKIP 0 | PASS 32 ] Error: Test failures + In addition: Warning messages: + 1: Database is garbage-collected, use dbConnect(duckdb()) with dbDisconnect(), or duckdb::duckdb_shutdown(drv) to avoid this. + 2: Database is garbage-collected, use dbConnect(duckdb()) with dbDisconnect(), or duckdb::duckdb_shutdown(drv) to avoid this. + 3: Database is garbage-collected, use dbConnect(duckdb()) with dbDisconnect(), or duckdb::duckdb_shutdown(drv) to avoid this. Execution halted ``` +## In both + +* checking data for non-ASCII characters ... NOTE + ``` + Note: found 14 marked UTF-8 strings + ``` + From 004f91ddc75de43337cf100a5385c202c753f5fc Mon Sep 17 00:00:00 2001 From: tau31 Date: Thu, 21 Dec 2023 22:30:08 +0100 Subject: [PATCH 18/21] check_bigint() and tests --- R/Driver.R | 28 +++++++++++++++------------- R/dbConnect__duckdb_driver.R | 7 +++++-- man/duckdb.Rd | 4 +++- tests/testthat/test-integer64.R | 15 +++++++++++++-- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/R/Driver.R b/R/Driver.R index de6125bc6..b2eed12d1 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -29,18 +29,7 @@ driver_registry <- new.env(parent = emptyenv()) #' @export duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", config = list()) { check_flag(read_only) - - switch(bigint, - numeric = { - # fine - }, - integer64 = { - if (!is_installed("bit64")) { - stop("bit64 package is required for integer64 support") - } - }, - stop(paste("Unsupported bigint configuration", bigint)) - ) + check_bigint(bigint) # R packages are not allowed to write extensions into home directory, so use R_user_dir instead if (!("extension_directory" %in% names(config))) { @@ -165,7 +154,6 @@ check_tz <- function(timezone) { timezone } - path_normalize <- function(path) { if (path == "" || path == DBDIR_MEMORY) { return(DBDIR_MEMORY) @@ -181,3 +169,17 @@ path_normalize <- function(path) { } out } + +check_bigint <- function(bigint_type) { + switch(bigint_type, + numeric = { + # fine + }, + integer64 = { + if (!is_installed("bit64")) { + stop("bit64 package is required for integer64 support") + } + }, + stop(paste("Unsupported bigint configuration", bigint_type)) + ) +} diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index 581859c53..50a30c61a 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -19,7 +19,9 @@ #' If `"force"` is chosen, the timestamp will have the same clock #' time as the timestamp in the database, but with the new time zone. #' @param config Named list with DuckDB configuration flags -#' @param bigint How 64-bit integers should be returned, default is double/numeric. Set to integer64 for bit64 encoding. +#' @param bigint How 64-bit integers should be returned. There are two options: `"numeric"` and `"integer64"`. +#' If `"numeric"` is selected, bigint integers will be treated as double/numeric. +#' If `"integer64"` is selected, bigint integers will be set to bit64 encoding. #' #' @return `dbConnect()` returns an object of class #' \linkS4class{duckdb_connection}. @@ -65,6 +67,8 @@ dbConnect__duckdb_driver <- function( if (missing(bigint)) { bigint <- drv@bigint + } else { + check_bigint(bigint) } config <- utils::modifyList(drv@config, config) @@ -79,7 +83,6 @@ dbConnect__duckdb_driver <- function( conn@timezone_out <- timezone_out conn@tz_out_convert <- tz_out_convert - on.exit(NULL) rs_on_connection_opened(conn) diff --git a/man/duckdb.Rd b/man/duckdb.Rd index ef358114c..fd2b57cd0 100644 --- a/man/duckdb.Rd +++ b/man/duckdb.Rd @@ -43,7 +43,9 @@ data is kept in RAM.} \item{read_only}{Set to \code{TRUE} for read-only operation} -\item{bigint}{How 64-bit integers should be returned, default is double/numeric. Set to integer64 for bit64 encoding.} +\item{bigint}{How 64-bit integers should be returned. There are two options: \code{"numeric"} and \code{"integer64"}. +If \code{"numeric"} is selected, bigint integers will be treated as double/numeric. +If \code{"integer64"} is selected, bigint integers will be set to bit64 encoding.} \item{config}{Named list with DuckDB configuration flags} diff --git a/tests/testthat/test-integer64.R b/tests/testthat/test-integer64.R index 44733f932..fc868533d 100644 --- a/tests/testthat/test-integer64.R +++ b/tests/testthat/test-integer64.R @@ -1,7 +1,6 @@ # this tests both retrieval and scans -test_that("we can roundtrip an integer64", { +test_that("we can roundtrip an integer64 via driver", { skip_if_not_installed("bit64") - con <- dbConnect(duckdb(bigint = "integer64")) on.exit(dbDisconnect(con, shutdown = TRUE)) df <- data.frame(a = bit64::as.integer64(42), b = bit64::as.integer64(-42), c = bit64::as.integer64(NA)) @@ -11,3 +10,15 @@ test_that("we can roundtrip an integer64", { res <- dbReadTable(con, "df") expect_identical(df, res) }) + +test_that("we can roundtrip an integer64 via dbConnect", { + skip_if_not_installed("bit64") + con <- dbConnect(duckdb(), bigint = "integer64") + on.exit(dbDisconnect(con, shutdown = TRUE)) + df <- data.frame(a = bit64::as.integer64(42), b = bit64::as.integer64(-42), c = bit64::as.integer64(NA)) + + duckdb_register(con, "df", df) + + res <- dbReadTable(con, "df") + expect_identical(df, res) +}) From dc9b006d90e6ea7290868783a56dc5060999e896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 11:34:04 +0100 Subject: [PATCH 19/21] Document behavior --- R/Driver.R | 20 +++++++++++--------- R/dbConnect__duckdb_driver.R | 10 ++++++++-- man/duckdb.Rd | 10 ++++++++-- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/R/Driver.R b/R/Driver.R index b2eed12d1..99698bd2c 100644 --- a/R/Driver.R +++ b/R/Driver.R @@ -31,25 +31,27 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric", check_flag(read_only) check_bigint(bigint) - # R packages are not allowed to write extensions into home directory, so use R_user_dir instead - if (!("extension_directory" %in% names(config))) { - config["extension_directory"] <- file.path(tools::R_user_dir("duckdb", "data"), "extensions") - } - if (!("secret_directory" %in% names(config))) { - config["secret_directory"] <- file.path(tools::R_user_dir("duckdb", "data"), "stored_secrets") - } - dbdir <- path_normalize(dbdir) if (dbdir != DBDIR_MEMORY) { drv <- driver_registry[[dbdir]] # We reuse an existing driver object if the database is still alive. # If not, we fall back to creating a new driver object with a new database. if (!is.null(drv) && rapi_lock(drv@database_ref)) { - # FIXME: Check that readonly and config are identical + # We don't care about different read_only or config settings here. + # The bigint setting can be actually picked up by dbConnect(), we update it here. + drv@bigint <- bigint return(drv) } } + # R packages are not allowed to write extensions into home directory, so use R_user_dir instead + if (!("extension_directory" %in% names(config))) { + config["extension_directory"] <- file.path(tools::R_user_dir("duckdb", "data"), "extensions") + } + if (!("secret_directory" %in% names(config))) { + config["secret_directory"] <- file.path(tools::R_user_dir("duckdb", "data"), "stored_secrets") + } + # Always create new database for in-memory, # allows isolation and mixing different configs drv <- new( diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index 50a30c61a..2307cc160 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -8,7 +8,10 @@ #' data is kept in RAM. #' @param ... Ignored #' @param debug Print additional debug information such as queries -#' @param read_only Set to `TRUE` for read-only operation +#' @param read_only Set to `TRUE` for read-only operation. +#' For file-based databases, this is only applied when the database file is opened for the first time. +#' Subsequent connections (via the same `drv` object or a `drv` object pointing to the same path) +#' will silently ignore this flag. #' @param timezone_out The time zone returned to R, defaults to `"UTC"`, which #' is currently the only timezone supported by duckdb. #' If you want to display datetime values in the local timezone, @@ -18,7 +21,10 @@ #' is chosen, the timestamp will be returned as it would appear in the specified time zone. #' If `"force"` is chosen, the timestamp will have the same clock #' time as the timestamp in the database, but with the new time zone. -#' @param config Named list with DuckDB configuration flags +#' @param config Named list with DuckDB configuration flags, see +#' for the possible options. +#' These flags are only applied when the database object is instantiated. +#' Subsequent connections will silently ignore these flags. #' @param bigint How 64-bit integers should be returned. There are two options: `"numeric"` and `"integer64"`. #' If `"numeric"` is selected, bigint integers will be treated as double/numeric. #' If `"integer64"` is selected, bigint integers will be set to bit64 encoding. diff --git a/man/duckdb.Rd b/man/duckdb.Rd index fd2b57cd0..b1eea5de7 100644 --- a/man/duckdb.Rd +++ b/man/duckdb.Rd @@ -41,13 +41,19 @@ duckdb_adbc() directory in the file system. With the default (or \code{""}), all data is kept in RAM.} -\item{read_only}{Set to \code{TRUE} for read-only operation} +\item{read_only}{Set to \code{TRUE} for read-only operation. +For file-based databases, this is only applied when the database file is opened for the first time. +Subsequent connections (via the same \code{drv} object or a \code{drv} object pointing to the same path) +will silently ignore this flag.} \item{bigint}{How 64-bit integers should be returned. There are two options: \code{"numeric"} and \code{"integer64"}. If \code{"numeric"} is selected, bigint integers will be treated as double/numeric. If \code{"integer64"} is selected, bigint integers will be set to bit64 encoding.} -\item{config}{Named list with DuckDB configuration flags} +\item{config}{Named list with DuckDB configuration flags, see +\url{https://duckdb.org/docs/configuration/overview#configuration-reference} for the possible options. +These flags are only applied when the database object is instantiated. +Subsequent connections will silently ignore these flags.} \item{drv}{Object returned by \code{duckdb()}} From 81bdeb6a4f0f4654afc0b202cf60610d4374afe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 15:42:16 +0100 Subject: [PATCH 20/21] Add tests --- tests/testthat/test-connect.R | 166 ++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 tests/testthat/test-connect.R diff --git a/tests/testthat/test-connect.R b/tests/testthat/test-connect.R new file mode 100644 index 000000000..d8981f832 --- /dev/null +++ b/tests/testthat/test-connect.R @@ -0,0 +1,166 @@ +# Run the first two tests manually + +test_that("warning on garbage-collecting unused driver", { + # Needs to run manually because these warnings can't be caught + skip_if_not(interactive()) + + duckdb() + expect_null(NULL) + # Expecting warning: "Database is garbage-collected" + gc() +}) + +test_that("warning on garbage-collecting open connection", { + # Needs to run manually because these warnings can't be caught + skip_if_not(interactive()) + + dbConnect(duckdb()) + expect_null(NULL) + # Expecting warning: "Connection is garbage-collected", but not "Database ..." + gc() +}) + +test_that("no warning after duckdb_shutdown() for unused driver", { + duckdb_shutdown(duckdb()) + + # The warning won't occur here anyway, this is to keep testthat happy + expect_warning(gc(), NA) +}) + +test_that("no warning after dbDisconnect() for used driver", { + con <- dbConnect(duckdb()) + dbDisconnect(con) + + # The warning won't occur here anyway, this is to keep testthat happy + expect_warning(gc(), NA) +}) + +test_that("no warning after dbDisconnect() for driver stored in variable", { + drv <- duckdb() + con <- dbConnect(drv) + dbDisconnect(con) + + # The warning won't occur here anyway, this is to keep testthat happy + expect_warning(gc(), NA) + + rm(drv) + expect_warning(gc(), NA) +}) + +test_that("can connect to the same in-memory database via the same driver object", { + drv <- duckdb() + + con1 <- dbConnect(drv) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "x", data.frame(a = 1:3)) + + con2 <- dbConnect(drv) + on.exit(dbDisconnect(con2), add = TRUE) + + expect_equal(dbReadTable(con2, "x"), data.frame(a = 1:3)) + + dbWriteTable(con2, "y", data.frame(b = 1:3)) + expect_equal(dbReadTable(con1, "y"), data.frame(b = 1:3)) + + gc() +}) + +test_that("will connect to different in-memory databases via different driver objects", { + drv1 <- duckdb() + con1 <- dbConnect(drv1) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "x", data.frame(a = 1:3)) + + drv2 <- duckdb() + con2 <- dbConnect(drv2) + on.exit(dbDisconnect(con2), add = TRUE) + + expect_false(dbExistsTable(con2, "x")) + + dbWriteTable(con2, "y", data.frame(b = 1:3)) + expect_false(dbExistsTable(con1, "y")) + + gc() +}) + +test_that("can connect to the same database file via the same driver object", { + drv <- duckdb(tempfile(fileext = ".duckdb")) + + con1 <- dbConnect(drv) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "x", data.frame(a = 1:3)) + + con2 <- dbConnect(drv) + on.exit(dbDisconnect(con2), add = TRUE) + + expect_equal(dbReadTable(con2, "x"), data.frame(a = 1:3)) + + dbWriteTable(con2, "y", data.frame(b = 1:3)) + expect_equal(dbReadTable(con1, "y"), data.frame(b = 1:3)) + + gc() +}) + +test_that("can connect to the same database file via different driver objects", { + path <- tempfile(fileext = ".duckdb") + writeLines(character(), path) + path <- normalizePath(path) + unlink(path) + + drv1 <- duckdb(path) + con1 <- dbConnect(drv1) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "x", data.frame(a = 1:3)) + + drv2 <- duckdb(path) + con2 <- dbConnect(drv2) + on.exit(dbDisconnect(con2), add = TRUE) + + expect_equal(dbReadTable(con2, "x"), data.frame(a = 1:3)) + + dbWriteTable(con2, "y", data.frame(b = 1:3)) + expect_equal(dbReadTable(con1, "y"), data.frame(b = 1:3)) + + gc() +}) + +test_that("read_only only applies to the first driver object for a path", { + path <- tempfile(fileext = ".duckdb") + writeLines(character(), path) + path <- normalizePath(path) + unlink(path) + + drv1 <- duckdb(path) + con1 <- dbConnect(drv1) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "y", data.frame(b = 1:3)) + + drv2 <- duckdb(path, read_only = TRUE) + con2 <- dbConnect(drv2) + on.exit(dbDisconnect(con2), add = TRUE) + + dbWriteTable(con2, "x", data.frame(a = 1:3)) + expect_equal(dbReadTable(con1, "x"), data.frame(a = 1:3)) + + gc() +}) + +test_that("config only applies to the first driver object for a path", { + path <- tempfile(fileext = ".duckdb") + writeLines(character(), path) + path <- normalizePath(path) + unlink(path) + + drv1 <- duckdb(path) + con1 <- dbConnect(drv1) + on.exit(dbDisconnect(con1), add = TRUE) + dbWriteTable(con1, "x", data.frame(a = 1:3)) + + drv2 <- duckdb(path, config = list(default_order = "DESC")) + con2 <- dbConnect(drv2) + on.exit(dbDisconnect(con2), add = TRUE) + expect_equal(dbGetQuery(con2, "SELECT * FROM x ORDER BY a"), data.frame(a = 1:3)) + + gc() +}) + From 2cab8037ae37c07d2815bf9d5c958b34f37d9695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 24 Mar 2024 15:48:40 +0100 Subject: [PATCH 21/21] Avoid warning when using other dbdir --- R/cpp11.R | 4 ++++ R/dbConnect__duckdb_driver.R | 1 + src/cpp11.cpp | 9 +++++++++ src/database.cpp | 7 +++++++ tests/testthat/test-connect.R | 8 ++++++++ 5 files changed, 29 insertions(+) diff --git a/R/cpp11.R b/R/cpp11.R index dcefa2f78..db272bb90 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -16,6 +16,10 @@ rapi_lock <- function(dual) { .Call(`_duckdb_rapi_lock`, dual) } +rapi_unlock <- function(dual) { + invisible(.Call(`_duckdb_rapi_unlock`, dual)) +} + rapi_is_locked <- function(dual) { .Call(`_duckdb_rapi_is_locked`, dual) } diff --git a/R/dbConnect__duckdb_driver.R b/R/dbConnect__duckdb_driver.R index 2307cc160..9b40aebe2 100644 --- a/R/dbConnect__duckdb_driver.R +++ b/R/dbConnect__duckdb_driver.R @@ -81,6 +81,7 @@ dbConnect__duckdb_driver <- function( # aha, a late comer. let's make a new instance. if (dbdir != drv@dbdir || !rapi_lock(drv@database_ref)) { + rapi_unlock(drv@database_ref) drv <- duckdb(dbdir, read_only, bigint, config) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 7c9c811f6..cb3c53d39 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -35,6 +35,14 @@ extern "C" SEXP _duckdb_rapi_lock(SEXP dual) { END_CPP11 } // database.cpp +void rapi_unlock(duckdb::db_eptr_t dual); +extern "C" SEXP _duckdb_rapi_unlock(SEXP dual) { + BEGIN_CPP11 + rapi_unlock(cpp11::as_cpp>(dual)); + return R_NilValue; + END_CPP11 +} +// database.cpp bool rapi_is_locked(duckdb::db_eptr_t dual); extern "C" SEXP _duckdb_rapi_is_locked(SEXP dual) { BEGIN_CPP11 @@ -455,6 +463,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_release", (DL_FUNC) &_duckdb_rapi_release, 1}, {"_duckdb_rapi_shutdown", (DL_FUNC) &_duckdb_rapi_shutdown, 1}, {"_duckdb_rapi_startup", (DL_FUNC) &_duckdb_rapi_startup, 3}, + {"_duckdb_rapi_unlock", (DL_FUNC) &_duckdb_rapi_unlock, 1}, {"_duckdb_rapi_unregister_arrow", (DL_FUNC) &_duckdb_rapi_unregister_arrow, 2}, {"_duckdb_rapi_unregister_df", (DL_FUNC) &_duckdb_rapi_unregister_df, 2}, {NULL, NULL, 0} diff --git a/src/database.cpp b/src/database.cpp index d6759535d..4b935a917 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -84,6 +84,13 @@ static bool CastRstringToVarchar(Vector &source, Vector &result, idx_t count, Ca return dual->has(); } +[[cpp11::register]] void rapi_unlock(duckdb::db_eptr_t dual) { + if (!dual || !dual.get()) { + cpp11::stop("rapi_unlock: Invalid database reference"); + } + dual->unlock(); +} + [[cpp11::register]] bool rapi_is_locked(duckdb::db_eptr_t dual) { if (!dual || !dual.get()) { cpp11::stop("rapi_is_locked: Invalid database reference"); diff --git a/tests/testthat/test-connect.R b/tests/testthat/test-connect.R index d8981f832..ba5b211c4 100644 --- a/tests/testthat/test-connect.R +++ b/tests/testthat/test-connect.R @@ -47,6 +47,14 @@ test_that("no warning after dbDisconnect() for driver stored in variable", { expect_warning(gc(), NA) }) +test_that("no warning on dbConnect() with other dbdir", { + con <- dbConnect(duckdb(), tempfile(fileext = ".duckdb")) + dbDisconnect(con) + + # The warning won't occur here anyway, this is to keep testthat happy + expect_warning(gc(), NA) +}) + test_that("can connect to the same in-memory database via the same driver object", { drv <- duckdb()