From c22f2bb35a37e75710349922707a812d98e61fe6 Mon Sep 17 00:00:00 2001 From: catalamarti Date: Mon, 9 Sep 2024 22:59:11 +0100 Subject: [PATCH 1/2] addObservationPeriodId + addObservationPeriodIdQuery --- NAMESPACE | 1 + R/addObservationPeriodId.R | 137 +++++++++++---- R/checks.R | 6 +- man/addObservationPeriodId.Rd | 15 +- man/addObservationPeriodIdQuery.Rd | 39 +++++ tests/testthat/test-addObservationPeriodId.R | 175 ++++++++++++------- 6 files changed, 270 insertions(+), 103 deletions(-) create mode 100644 man/addObservationPeriodIdQuery.Rd diff --git a/NAMESPACE b/NAMESPACE index 3c2845f8..bd8d438b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(addFutureObservationQuery) export(addInObservation) export(addInObservationQuery) export(addObservationPeriodId) +export(addObservationPeriodIdQuery) export(addPriorObservation) export(addPriorObservationQuery) export(addSex) diff --git a/R/addObservationPeriodId.R b/R/addObservationPeriodId.R index 4bacd67c..770a2f44 100644 --- a/R/addObservationPeriodId.R +++ b/R/addObservationPeriodId.R @@ -1,8 +1,26 @@ -#' Add the ID associated with current observation period +# Copyright 2024 DARWIN EU (C) +# +# This file is part of PatientProfiles +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Add the ordinal number of the observation period associated that a given date +#' is in. #' #' @param x Table with individuals in the cdm. #' @param indexDate Variable in x that contains the date to compute the #' observation flag. +#' @param nameObservationPeriodId Name of the new colum. #' @param name Name of the new table, if NULL a temporary table is returned. #' #' @return Table with the current observation period id added. @@ -17,57 +35,102 @@ #' } #' addObservationPeriodId <- function(x, - indexDate = "cohort_start_date", - name = NULL) { - + indexDate = "cohort_start_date", + nameObservationPeriodId = "observation_period_id", + name = NULL) { x <- validateX(x) - name <- validateName(name) + name <- omopgenerics::validateNameArgument( + name = name, + cdm = omopgenerics::cdmReference(x), + validation = "warning", + null = TRUE + ) + x |> + .addObservationPeriodIdQuery( + indexDate = indexDate, + nameObservationPeriodId = nameObservationPeriodId + ) |> + computeTable(name = name) +} + +#' Add the ordinal number of the observation period associated that a given date +#' is in. Result is not computed, only query is added. +#' +#' @param x Table with individuals in the cdm. +#' @param indexDate Variable in x that contains the date to compute the +#' observation flag. +#' @param nameObservationPeriodId Name of the new colum. +#' +#' @return Table with the current observation period id added. +#' @export +#' +#' @examples +#' \donttest{ +#' cdm <- mockPatientProfiles() +#' cdm$cohort1 %>% +#' addObservationPeriodIdQuery() +#' mockDisconnect(cdm = cdm) +#' } +#' +addObservationPeriodIdQuery <- function(x, + indexDate = "cohort_start_date", + nameObservationPeriodId = "observation_period_id") { + x |> + validateX() |> + .addObservationPeriodIdQuery( + indexDate = indexDate, + nameObservationPeriodId = nameObservationPeriodId + ) +} + +.addObservationPeriodIdQuery <- function(x, + indexDate, + nameObservationPeriodId, + call = parent.frame()){ cdm <- omopgenerics::cdmReference(x) - xName <- omopgenerics::tableName(x) - if(!is.na(xName) && - omopgenerics::tableName(x) == "observation_period"){ - cli::cli_abort("addObservationPeriodId cannot be used on the observation period table") - } - indexDate <- validateIndexDate(indexDate, null = FALSE, x = x) + indexDate <- validateIndexDate(indexDate, null = FALSE, x = x, call = call) personVariable <- c("person_id", "subject_id") personVariable <- personVariable[personVariable %in% colnames(x)] + nameObservationPeriodId <- validateColumn(nameObservationPeriodId, call = call) # drop variable if it already exists - if("observation_period_id" %in% colnames(x)){ - cli::cli_warn(c("!" = "Existing observation_period_id column will be overwritten")) + if(nameObservationPeriodId %in% colnames(x)){ + cli::cli_warn(c("!" = "Existing {nameObservationPeriodId} column will be overwritten")) x <- x |> - dplyr::select(!dplyr::all_of("observation_period_id")) + dplyr::select(!dplyr::all_of(nameObservationPeriodId)) } # if empty table, return with variable name added if(x |> utils::head(1) |> dplyr::tally() |> dplyr::pull("n") == 0){ return(x |> - dplyr::mutate(observation_period_id = as.integer(NA))) + dplyr::mutate(!!nameObservationPeriodId := as.integer(NA))) } - currentObsId <- x |> - dplyr::select(dplyr::all_of(c(personVariable, - indexDate))) |> - dplyr::left_join( + cols <- omopgenerics::uniqueId(n = 2, exclude = colnames(x)) + + currentObsId <- x |> + dplyr::select(dplyr::all_of(c(personVariable, indexDate))) |> + dplyr::inner_join( cdm$observation_period |> - dplyr::select(dplyr::all_of(c("person_id", - "observation_period_id", - "observation_period_start_date", - "observation_period_end_date" - ))) |> - dplyr::rename(!!personVariable := "person_id"), - by = personVariable) |> + dplyr::select( + !!personVariable :="person_id", + !!cols[1] := "observation_period_start_date", + !!cols[2] := "observation_period_end_date" + ), + by = personVariable + ) |> + dplyr::group_by(.data[[personVariable]], .data[[indexDate]]) |> + dplyr::arrange(.data[[cols[1]]]) |> + dplyr::mutate(!!nameObservationPeriodId := dplyr::row_number()) |> + dplyr::ungroup() |> dplyr::filter( - .data[[indexDate]] <= .data[["observation_period_end_date"]] && - .data[[indexDate]] >= .data[["observation_period_start_date"]]) |> - dplyr::select(dplyr::all_of(c(personVariable, - indexDate, - "observation_period_id"))) - - x <- x |> - dplyr::left_join(currentObsId, - by = c(personVariable, indexDate)) |> - computeTable(name = name) + .data[[indexDate]] <= .data[[cols[2]]] && + .data[[indexDate]] >= .data[[cols[1]]] + ) |> + dplyr::select(dplyr::all_of(c( + personVariable, indexDate, nameObservationPeriodId + ))) - x + x |> + dplyr::left_join(currentObsId, by = c(personVariable, indexDate)) } diff --git a/R/checks.R b/R/checks.R index 163d529f..83c70138 100644 --- a/R/checks.R +++ b/R/checks.R @@ -473,7 +473,7 @@ warnOverwriteColumns <- function(x, nameStyle, values = list()) { } # checks demographics -validateX <- function(x, call) { +validateX <- function(x, call = parent.frame()) { omopgenerics::assertClass(x, class = "cdm_table", call = call) cols <- colnames(x) n <- sum(c("person_id", "subject_id") %in% cols) @@ -508,14 +508,14 @@ validateIndexDate <- function(indexDate, null, x, call) { } return(indexDate) } -validateColumn <- function(col, null, call) { +validateColumn <- function(col, null = FALSE, call = parent.frame()) { if (null) { return(NULL) } nm <- paste0(substitute(col)) - err <- "{nm} must be a snake_case character vector" + err <- "{nm} must be a snake_case character string" if (!is.character(col)) cli::cli_abort(message = err, call = call) if (length(col) != 1) cli::cli_abort(message = err, call = call) if (is.na(col)) cli::cli_abort(message = err, call = call) diff --git a/man/addObservationPeriodId.Rd b/man/addObservationPeriodId.Rd index fcb52577..411e270f 100644 --- a/man/addObservationPeriodId.Rd +++ b/man/addObservationPeriodId.Rd @@ -2,9 +2,15 @@ % Please edit documentation in R/addObservationPeriodId.R \name{addObservationPeriodId} \alias{addObservationPeriodId} -\title{Add the ID associated with current observation period} +\title{Add the ordinal number of the observation period associated that a given date +is in.} \usage{ -addObservationPeriodId(x, indexDate = "cohort_start_date", name = NULL) +addObservationPeriodId( + x, + indexDate = "cohort_start_date", + nameObservationPeriodId = "observation_period_id", + name = NULL +) } \arguments{ \item{x}{Table with individuals in the cdm.} @@ -12,13 +18,16 @@ addObservationPeriodId(x, indexDate = "cohort_start_date", name = NULL) \item{indexDate}{Variable in x that contains the date to compute the observation flag.} +\item{nameObservationPeriodId}{Name of the new colum.} + \item{name}{Name of the new table, if NULL a temporary table is returned.} } \value{ Table with the current observation period id added. } \description{ -Add the ID associated with current observation period +Add the ordinal number of the observation period associated that a given date +is in. } \examples{ \donttest{ diff --git a/man/addObservationPeriodIdQuery.Rd b/man/addObservationPeriodIdQuery.Rd new file mode 100644 index 00000000..151552bd --- /dev/null +++ b/man/addObservationPeriodIdQuery.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/addObservationPeriodId.R +\name{addObservationPeriodIdQuery} +\alias{addObservationPeriodIdQuery} +\title{Add the ordinal number of the observation period associated that a given date +is in. Result is not computed, only query is added.} +\usage{ +addObservationPeriodIdQuery( + x, + indexDate = "cohort_start_date", + nameObservationPeriodId = "observation_period_id" +) +} +\arguments{ +\item{x}{Table with individuals in the cdm.} + +\item{indexDate}{Variable in x that contains the date to compute the +observation flag.} + +\item{nameObservationPeriodId}{Name of the new colum.} + +\item{name}{Name of the new table, if NULL a temporary table is returned.} +} +\value{ +Table with the current observation period id added. +} +\description{ +Add the ordinal number of the observation period associated that a given date +is in. Result is not computed, only query is added. +} +\examples{ +\donttest{ +cdm <- mockPatientProfiles() +cdm$cohort1 \%>\% + addObservationPeriodIdQuery() +mockDisconnect(cdm = cdm) +} + +} diff --git a/tests/testthat/test-addObservationPeriodId.R b/tests/testthat/test-addObservationPeriodId.R index 912bba7d..b862ddd7 100644 --- a/tests/testthat/test-addObservationPeriodId.R +++ b/tests/testthat/test-addObservationPeriodId.R @@ -1,100 +1,155 @@ test_that("add observation period id", { skip_on_cran() - cdm <- mockPatientProfiles( - con = connection(), - writeSchema = writeSchema()) person <- dplyr::tibble( - person_id = c(1L,2L,3L), + person_id = c(1L, 2L, 3L), gender_concept_id = 1L, year_of_birth = 1990L, race_concept_id = 1L, ethnicity_concept_id = 1L ) - cdm <- omopgenerics::insertTable(cdm, "person", - person) observation_period <- dplyr::tibble( - person_id = c(1L,1L,2L,3L), - observation_period_start_date = as.Date(c("2010-01-01", - "2019-01-01", - "2019-01-01", - "2019-01-01")), - observation_period_end_date = as.Date(c("2016-01-01", - "2021-01-01", - "2022-01-01", - "2019-01-01")), - observation_period_id = c(1L,2L,3L, 4L), + person_id = c(1L, 1L, 2L, 3L), + observation_period_start_date = as.Date(c( + "2010-01-01", "2019-01-01", "2019-01-01", "2019-01-01" + )), + observation_period_end_date = as.Date(c( + "2016-01-01", "2021-01-01", "2022-01-01", "2019-01-01" + )), + observation_period_id = c(1L, 2L, 3L, 4L), period_type_concept_id = 0L ) - cdm <- omopgenerics::insertTable(cdm, "observation_period", - observation_period) + + cdm <- mockPatientProfiles( + con = connection(), + writeSchema = writeSchema(), + person = person, + observation_period = observation_period + ) + my_cohort <- dplyr::tibble( - cohort_definition_id = c(1L,2L,1L,1L), - subject_id =c(1L,1L,2L,3L), - cohort_start_date = as.Date(c("2020-01-01", - "2015-05-12", - "2020-01-01", - "2020-01-01")), - cohort_end_date = as.Date(c("2020-01-01", - "2015-05-12", - "2020-01-01", - "2020-01-01")) + cohort_definition_id = c(1L, 2L, 1L, 1L), + subject_id = c(1L, 1L, 2L, 3L), + cohort_start_date = as.Date(c( + "2020-01-01", "2015-05-12", "2020-01-01", "2020-01-01" + )), + cohort_end_date = as.Date(c( + "2020-01-01", "2015-05-12", "2020-01-01", "2020-01-01" + )) + ) + + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "my_cohort", table = my_cohort ) - cdm <- omopgenerics::insertTable(cdm, "my_cohort", - my_cohort) + # note we have a cohort entry outside of observation to test expected NA cdm$my_cohort <- omopgenerics::newCohortTable(cdm$my_cohort, - .softValidation = TRUE) + .softValidation = TRUE + ) cdm$my_cohort_obs <- cdm$my_cohort |> addObservationPeriodId() - expect_true("observation_period_id" %in% - colnames(cdm$my_cohort_obs)) + expect_true("observation_period_id" %in% colnames(cdm$my_cohort_obs)) expect_true(cdm$my_cohort_obs |> - dplyr::filter(subject_id == 1) |> - dplyr::filter(cohort_definition_id == 1L) |> - dplyr::pull("observation_period_id") == 2L ) + dplyr::filter(subject_id == 1) |> + dplyr::filter(cohort_definition_id == 1L) |> + dplyr::pull("observation_period_id") == 2L) expect_true(cdm$my_cohort_obs |> - dplyr::filter(subject_id == 1) |> - dplyr::filter(cohort_definition_id == 2L) |> - dplyr::pull("observation_period_id") == 1L ) + dplyr::filter(subject_id == 1) |> + dplyr::filter(cohort_definition_id == 2L) |> + dplyr::pull("observation_period_id") == 1L) expect_true(cdm$my_cohort_obs |> - dplyr::filter(subject_id == 2) |> - dplyr::pull("observation_period_id") == 3L ) + dplyr::filter(subject_id == 2) |> + dplyr::pull("observation_period_id") == 1L) expect_true(is.na(cdm$my_cohort_obs |> dplyr::filter(subject_id == 3) |> dplyr::pull("observation_period_id"))) # overwrite variable if it already exists - expect_warning(cdm$my_cohort_obs <- cdm$my_cohort_obs |> + expect_warning(cdm$my_cohort_obs <- cdm$my_cohort_obs |> addObservationPeriodId()) - # cannot add to the observation period table -expect_error(cdm$observation_period |> - addObservationPeriodId(indexDate = "observation_period_end_date")) + # observation_period_id is overwritten + expect_warning(x <- cdm$observation_period |> + addObservationPeriodId(indexDate = "observation_period_end_date")) + expect_identical( + x |> + dplyr::collect() |> + dplyr::arrange(.data$person_id, .data$observation_period_start_date) |> + dplyr::pull("observation_period_id"), + x |> + dplyr::group_by(.data$person_id) |> + dplyr::arrange(.data$observation_period_start_date) |> + dplyr::mutate(id = dplyr::row_number()) |> + dplyr::collect() |> + dplyr::arrange(.data$person_id, .data$observation_period_start_date) |> + dplyr::pull("id") + ) # must have either person or subject id -expect_error(cdm$drug_exposure |> - dplyr::select(!"person_id") |> - addObservationPeriodId(indexDate = "drug_exposure_start_date")) + expect_error(cdm$drug_exposure |> + dplyr::select(!"person_id") |> + addObservationPeriodId(indexDate = "drug_exposure_start_date")) -expect_error(cdm$drug_exposure |> - dplyr::mutate("subject_id" = 1L) |> - addObservationPeriodId(indexDate = "drug_exposure_start_date")) + expect_error(cdm$drug_exposure |> + dplyr::mutate("subject_id" = 1L) |> + addObservationPeriodId(indexDate = "drug_exposure_start_date")) # must have index date column -expect_error(cdm$drug_exposure |> - addObservationPeriodId(indexDate = "cohort_start_date")) - expect_no_error(cdm$drug_exposure |> - addObservationPeriodId(indexDate = "drug_exposure_start_date")) + expect_error(cdm$drug_exposure |> + addObservationPeriodId(indexDate = "cohort_start_date")) + expect_no_error(cdm$drug_exposure |> + addObservationPeriodId(indexDate = "drug_exposure_start_date")) + + # no error if empty + cdm$drug_exposure |> + dplyr::filter(person_id == 0) |> + addObservationPeriodId(indexDate = "drug_exposure_start_date") - # no error if empty - cdm$drug_exposure |> - dplyr::filter(person_id == 0) |> - addObservationPeriodId(indexDate = "drug_exposure_start_date") + # check name + expect_warning( + cdm$my_cohort_obs <- cdm$my_cohort_obs |> + dplyr::select(-"observation_period_id") |> + addObservationPeriodId(name = "my_cohort_obs") + ) + expect_no_error( + cdm$my_cohort_2 <- cdm$my_cohort_obs |> + dplyr::select(-"observation_period_id") |> + addObservationPeriodId(name = "my_cohort_2") + ) + expect_error( + cdm$my_cohort_2 <- cdm$my_cohort_obs |> + dplyr::select(-"observation_period_id") |> + addObservationPeriodId(name = "my_cohort_3") + ) - mockDisconnect(cdm = cdm) + # check nameObservationPeriodId + expect_no_error( + cdm$my_cohort_2 <- cdm$my_cohort_obs |> + addObservationPeriodId(nameObservationPeriodId = "custom_name") + ) + expect_true("custom_name" %in% colnames(cdm$my_cohort_2)) + expect_warning( + cdm$my_cohort_2 <- cdm$my_cohort_obs |> + addObservationPeriodId(nameObservationPeriodId = "camelCase") + ) + expect_true("camel_case" %in% colnames(cdm$my_cohort_2)) + expect_false("camelCase" %in% colnames(cdm$my_cohort_2)) + + expect_error( + cdm$my_cohort_obs |> + addObservationPeriodId(nameObservationPeriodId = NA_character_) + ) + expect_error( + cdm$my_cohort_obs |> + addObservationPeriodId(nameObservationPeriodId = NULL) + ) + expect_error( + cdm$my_cohort_obs |> + addObservationPeriodId(nameObservationPeriodId = c("id1", "id2")) + ) + mockDisconnect(cdm = cdm) }) From 7728fbc4fa5e699ff44ca6c6f6f3c89fbb32ff53 Mon Sep 17 00:00:00 2001 From: catalamarti Date: Mon, 9 Sep 2024 22:59:59 +0100 Subject: [PATCH 2/2] Update addObservationPeriodIdQuery.Rd --- man/addObservationPeriodIdQuery.Rd | 2 -- 1 file changed, 2 deletions(-) diff --git a/man/addObservationPeriodIdQuery.Rd b/man/addObservationPeriodIdQuery.Rd index 151552bd..e81c8702 100644 --- a/man/addObservationPeriodIdQuery.Rd +++ b/man/addObservationPeriodIdQuery.Rd @@ -18,8 +18,6 @@ addObservationPeriodIdQuery( observation flag.} \item{nameObservationPeriodId}{Name of the new colum.} - -\item{name}{Name of the new table, if NULL a temporary table is returned.} } \value{ Table with the current observation period id added.