From afb0665a0e4b69a3cb9e7c556b31fdb95734cb72 Mon Sep 17 00:00:00 2001 From: barbarazpc Date: Thu, 28 Sep 2023 23:08:29 +0200 Subject: [PATCH] closes #2, closes #5 Improvements to accessor functions & show method --- R/accessors.R | 70 +++++------- R/countMolecules.R | 13 ++- R/other_methods.R | 268 ++++++++++++++++++++++----------------------- R/readSegMask.R | 3 +- 4 files changed, 169 insertions(+), 185 deletions(-) diff --git a/R/accessors.R b/R/accessors.R index 8488f0c..e7a3610 100644 --- a/R/accessors.R +++ b/R/accessors.R @@ -38,17 +38,20 @@ #' keepCols = "essential", #' addBoundaries = "cell" #' ) +#' +#' # get insight into MoleculeExperiment object (e.g., see assay names) +#' me #' -#' # get insight into molecules slot +#' # get insight into molecules slot (e.g., see the assay names) #' showMolecules(me) #' #' # for developers, use molecules() getter #' # expect a large output from call below -#' # molecules(me) +#' # molecules(me, assayName = "detected") #' # alternatively, return rectangular data structure with flatten = TRUE #' molecules(me, assayName = "detected", flatten = TRUE) #' -#' # get insight into boundaries slot +#' # get insight into boundaries slot (e.g., see the assay names) #' showBoundaries(me) #' #' # for developers, use boundaries() getter @@ -58,10 +61,10 @@ #' boundaries(me, assayName = "cell", flatten = TRUE) #' #' # features() getter -#' features(me) +#' features(me, assayName = "detected") #' #' # segmentIDs() getter -#' segmentIDs(me, "cell") +#' segmentIDs(me, assayName = "cell") #' #' # setter example #' # read in and standardise nucleus boundaries too @@ -77,7 +80,7 @@ #' ) #' #' # use `boundaries<-` setter to add nucleus boundaries to the boundaries slot -#' boundaries(me, "nucleus") <- nucleiMEList +#' boundaries(me, assayName = "nucleus") <- nucleiMEList #' me #' @return A MoleculeExperiment object slot. NULL @@ -85,15 +88,15 @@ NULL #' @rdname accessors #' @export #' @importFrom methods is -#' @importFrom cli cli_inform setMethod("molecules", signature = signature(object = "MoleculeExperiment"), definition = function(object, - assayName = "detected", + assayName = NULL, flatten = FALSE) { # check arg validity + # retrieve molecules only when correct assay name has been provided + .stop_if_null(assayName) .check_if_character(assayName) - if (!assayName %in% names(object@molecules)) { stop("Assay name specified does not exist in molecules slot. Please specify another assay name in the assayName argument.") @@ -104,13 +107,6 @@ Please specify another assay name in the assayName argument.") big_df <- .flatten_molecules(object, assay_name = assayName) return(big_df) } else { - cli::cli_inform(c( - "{.emph {assayName}} assay transcripts were retrieved.", - "i" = paste0( - "Other transcript assays can be retrieved by", - " specifying the {.var assayName} argument." - ) - )) return(object@molecules[assayName]) } } @@ -123,32 +119,18 @@ setMethod("boundaries", signature = signature(object = "MoleculeExperiment"), definition = function(object, assayName = NULL, flatten = FALSE) { # check arg validity + .stop_if_null(assayName) .check_if_character(assayName) # get boundaries slot information - if (is.null(assayName)) { - warning( - "All boundaries assays were returned: ", - names(object@boundaries), ". To select only a specific boundary -subslot, specify the assayName argument." - ) - return(object@boundaries) - } else { - if (!assayName %in% names(object@boundaries)) { - stop("Assay name specified does not exist in boundaries slot. + if (!assayName %in% names(object@boundaries)) { + stop("Assay name specified does not exist in boundaries slot. Please specify another assay name in the assayName argument.") - } + } else { if (flatten) { big_df <- .flatten_boundaries(object, assay_name = assayName) return(big_df) } else { - cli::cli_inform(c( - "{.emph {assayName}} assay boundaries were retrieved.", - "i" = paste0( - "Other boundary assays can be retrieved by", - " specifying the {.var assayName} argument." - ) - )) return(object@boundaries[assayName]) } } @@ -159,9 +141,14 @@ Please specify another assay name in the assayName argument.") #' @export setMethod("features", signature = signature(object = "MoleculeExperiment"), - definition = function(object, assayName = "detected") { + definition = function(object, assayName = NULL) { # check arg validity + .stop_if_null(assayName) .check_if_character(assayName) + if (!assayName %in% names(object@molecules)) { + stop("Assay name specified does not exist in molecules slot. +Please specify another assay name in the assayName argument.") + } # get the features from the molecules slot samples <- names(object@molecules[[assayName]]) @@ -169,16 +156,7 @@ setMethod("features", names(object@molecules[[assayName]][[s]]) }) names(f_list) <- samples - # TODO: use a verbosity setting to fix this!!! return(f_list) - cli::cli_inform(c( - " {.emph {assayName}} assay features were retrieved.", - "i" = paste0( - "To select features from a different assay, specify it ", - "assay in the {.var assayName} argument." - ) - )) - } ) @@ -193,6 +171,10 @@ setMethod("segmentIDs", retrieve the unique IDs. For example, the \"cells\" assay for cell boundaries.") } .check_if_character(assayName) + if (!assayName %in% names(object@boundaries)) { + stop("Assay name specified does not exist in boundaries slot. +Please specify another assay name in the assayName argument.") + } # get the segment IDs from the boundaries slot samples <- names(object@boundaries[[assayName]]) diff --git a/R/countMolecules.R b/R/countMolecules.R index 942a013..d01d2f2 100644 --- a/R/countMolecules.R +++ b/R/countMolecules.R @@ -51,10 +51,18 @@ countMolecules <- function(me, buffer = 0, matrixOnly = FALSE, nCores = 1) { - # check arg validity + # check arg validity .check_if_me(me) .stop_if_null(boundariesAssay, moleculesAssay) .check_if_character(boundariesAssay, moleculesAssay) + if (!moleculesAssay %in% names(me@molecules)) { + stop("Assay name specified does not exist in molecules slot. +Please specify another assay name in the assayName argument.") + } + if (!boundariesAssay %in% names(me@boundaries)) { + stop("Assay name specified does not exist in boundaries slot. +Please specify another assay name in the assayName argument.") + } init_mols <- MoleculeExperiment::molecules(me, moleculesAssay) init_bds <- MoleculeExperiment::boundaries(me, boundariesAssay) @@ -66,7 +74,8 @@ countMolecules <- function(me, @boundaries slot.") } samples <- names(me@molecules[[moleculesAssay]]) - features <- sort(unique(unlist(MoleculeExperiment::features(me)))) + features <- sort(unique(unlist( + MoleculeExperiment::features(me, moleculesAssay)))) bds_all <- init_bds[[boundariesAssay]] diff --git a/R/other_methods.R b/R/other_methods.R index 5540064..d1a261b 100644 --- a/R/other_methods.R +++ b/R/other_methods.R @@ -5,13 +5,11 @@ #' For example, showMolecules and showBoundaries summarise the large nested ME #' list of lists in the molecules and boundaries slots. #' nFeatures and nTranscripts get the numbers of features or transcripts, -#' respectively. They can do so across all samples, or per sample. +#' respectively. #' #' @param object Name of MoleculeExperiment object of interest. #' @param assayName Character string specifying the name of the assay from #' which to view a summary of the contents. -#' @param perSample Logical value specifying whether or not to summarize the -#' information per sample. #' #' @aliases #' showMolecules @@ -36,84 +34,91 @@ #' showBoundaries(me) #' #' nFeatures(me) -#' nFeatures(me, perSample = TRUE) #' #' nTranscripts(me) -#' nTranscripts(me, perSample = TRUE) #' @return A MoleculeExperiment object summary. NULL -setMethod("show", - signature = signature(object = "MoleculeExperiment"), - definition = function(object) { - # TODO: Make these methods assay name agnostic - cat("class: ", class(object), "\n") - cat( - paste( - length(object@molecules[["detected"]]), - "samples:", - paste(utils::head(names(object@molecules[["detected"]])), - collapse = " " - ) - ), - "\n" - ) - - cat("\n@molecules contents: ", "-detected assay:", sep = "\n") - nFeatures(object, "detected") - nTranscripts(object, "detected") - - # show range of coordinates - samples <- names(object@molecules[["detected"]]) - sample_x <- lapply(samples, function(x) { - f <- features(object)[[x]] - # get x coordinates for each gene - gene_x <- lapply(f, function(f) { - object@molecules[["detected"]][[x]][[f]][["x_location"]] - }) - }) - - sample_y <- lapply(samples, function(x) { - f <- features(object)[[x]] - # get y coordinates for each gene - gene_y <- lapply(f, function(f) { - object@molecules[["detected"]][[x]][[f]][["y_location"]] - }) - }) - - x_v <- unlist(sample_x) - y_v <- unlist(sample_y) - cat(paste0( - "Location range across all samples in assay \"detected\": [", - round(min(x_v), 2), ",", round(max(x_v), 2), "] x [", - round(min(y_v), 2), ",", round(max(y_v), 2), "]", "\n" - )) - - if (length(names(object@molecules)) > 1) { - all <- utils::head(names(object@molecules)) - cat(paste0( - "-other assays: ", - paste(all[all != "detected"], sep = ",", collapse = " "), "\n" +#' @importFrom S4Vectors coolcat +.me_show <- function(object) { + cat("MoleculeExperiment class\n\n") + + mols_assay_names <- names(object@molecules) + boundaries_assay_names <- names(object@boundaries) + + # show molecules contents + S4Vectors::coolcat("molecules slot (%d): %s\n", mols_assay_names) + + # show molecules slot contents per asssay and per sample + for (assay in mols_assay_names) { + + cat(paste0("- ", assay, ":", "\n")) + sample_names <- names(object@molecules[[assay]]) + S4Vectors::coolcat("samples (%d): %s\n", sample_names) + + for (sample in sample_names) { + cat(paste0("-- ", sample, ":\n")) + # show feature names + S4Vectors::coolcat("---- features (%d): %s\n", + MoleculeExperiment::features(object, + assayName = assay)[[sample]]) + + # show number of molecules + features <- object@molecules[[assay]][[sample]] + numbers_ls <- lapply(names(features), + function(x) {nrow(features[[x]])}) + + total <- sum(unlist(numbers_ls)) + cat(paste0("---- molecules (", total, ")\n")) + + # show location range of detected molecules + feature_names <- MoleculeExperiment::features( + object, assayName = assay)[[sample]] + gene_x <- lapply(feature_names, function(f) { + object@molecules[[assay]][[sample]][[f]][["x_location"]] + }) + gene_y <- lapply(feature_names, function(f) { + object@molecules[[assay]][[sample]][[f]][["y_location"]] + }) + x_v <- unlist(gene_x) + y_v <- unlist(gene_y) + cat(paste0("---- location range: [", + round(min(x_v), 2), ",", round(max(x_v), 2), "] x [", + round(min(y_v), 2), ",", round(max(y_v), 2), "]", "\n" )) } + } - if (is.null(object@boundaries)) { - cat("\n@boundaries contents: NULL\n") - } else { - cat("\n@boundaries contents:\n") - for (i in names(object@boundaries)) { - cat(paste0("-", i, ":\n")) - id_ls <- segmentIDs(object, assayName = i) - n_comp <- mean(lengths(id_ls)) - cat(paste0( - n_comp, " unique segment IDs: ", - paste(utils::head(id_ls[[1]]), collapse = " "), " ...\n" - )) + # show boundary contents + if (is.null(object@boundaries)) { + cat("\n\nboundaries slot: NULL\n") + } else { + S4Vectors::coolcat("\n\nboundaries slot (%d): %s\n", + boundaries_assay_names) + + # show the contents per assay and per sample + for (assay in boundaries_assay_names) { + cat(paste0("- ", assay, ":", "\n")) + sample_names <- names(object@boundaries[[assay]]) + S4Vectors::coolcat("samples (%d): %s\n", sample_names) + + for (sample in sample_names) { + cat(paste0("-- ", sample, ":\n")) + S4Vectors::coolcat("---- segments (%d): %s\n", + MoleculeExperiment::segmentIDs( + object, + assayName = assay)[[sample]] + ) } } } -) +} + +#' @rdname summarization +setMethod("show", + signature = signature(object = "MoleculeExperiment"), + .me_show) #' @rdname summarization #' @export @@ -128,23 +133,41 @@ setMethod("showMolecules", #' @rdname summarization #' @export #' @importFrom utils str -setMethod("extent", +setMethod("showBoundaries", signature = signature(object = "MoleculeExperiment"), definition = function(object) { - samples <- names(object@molecules[["detected"]]) + str(object@boundaries, max.level = 3, list.len = 2) + } +) + +#' @rdname summarization +#' @export +#' @importFrom utils str +setMethod("extent", + signature = signature(object = "MoleculeExperiment"), + definition = function(object, assayName = NULL) { + .stop_if_null(assayName) + .check_if_character(assayName) + + if (!assayName %in% names(object@molecules)) { + stop("Assay name specified does not exist in molecules slot. +Please specify another assay name in the assayName argument.") + } + + samples <- names(object@molecules[[assayName]]) sample_x <- lapply(samples, function(x) { - f <- features(object)[[x]] + f <- MoleculeExperiment::features(object, assayName = assayName)[[x]] # get x coordinates for each gene gene_x <- lapply(f, function(f) { - object@molecules[["detected"]][[x]][[f]][["x_location"]] + object@molecules[[assayName]][[x]][[f]][["x_location"]] }) }) sample_y <- lapply(samples, function(x) { - f <- features(object)[[x]] + f <- MoleculeExperiment::features(object, assayName = assayName)[[x]] # get y coordinates for each gene gene_y <- lapply(f, function(f) { - object@molecules[["detected"]][[x]][[f]][["y_location"]] + object@molecules[[assayName]][[x]][[f]][["y_location"]] }) }) @@ -157,49 +180,27 @@ setMethod("extent", } ) -#' @rdname summarization -#' @export -#' @importFrom utils str -setMethod("showBoundaries", - signature = signature(object = "MoleculeExperiment"), - definition = function(object) { - str(object@boundaries, max.level = 3, list.len = 2) - } -) #' @rdname summarization #' @export setMethod("nFeatures", signature = signature(object = "MoleculeExperiment"), - definition = function(object, assayName = "detected", perSample = FALSE) { - # check arg validity - .check_if_character(assayName) - - # calculate number of features in molecules slot - if (perSample) { - return(lengths(object@molecules[[assayName]])) - } else { - f_sample <- lapply( - names(object@molecules[[assayName]]), - function(t) { - names(object@molecules[[assayName]][[t]]) - } - ) - - number <- length(unique(unlist(f_sample))) - - features <- paste( - utils::head(features(object, assayName)[[1]]), - collapse = " " - ) - cli::cli_inform(paste0( - "{number} unique features across all samples in assay ", - "{.emph {assayName}}: ", - "{features}", - "..." - )) - - number + definition = function(object) { + # get assayNames from molecules slot + all_assayNames <- names(object@molecules) + for (assayName in all_assayNames) { + cat(paste("- assay", assayName,":", "\n")) + # print number of features in each sample and each assay + all_samples <- names(MoleculeExperiment::molecules( + object, assayName = assayName)[[assayName]]) + for (sample in all_samples) { + cat(paste(sample, + ":", + length(object@molecules[[assayName]][[sample]]), + "\n" + ) + ) + } } } ) @@ -209,32 +210,23 @@ setMethod("nFeatures", #' @export setMethod("nTranscripts", signature = signature(object = "MoleculeExperiment"), - definition = function(object, - assayName = "detected", - perSample = FALSE) { - # check arg validity - .check_if_character(assayName) - - # calculate total transcripts in molecules slot - samples <- names(object@molecules[[assayName]]) - sample_numbers <- vector("integer", length(samples)) - names(sample_numbers) <- samples - for (s in samples) { - features <- object@molecules[[assayName]][[s]] - numbers_ls <- lapply(names(features), function(x) { - nrow(features[[x]]) - }) - total <- sum(unlist(numbers_ls)) - sample_numbers[[s]] <- total - } + definition = function(object) { - if (perSample) { - return(sample_numbers) - } else { - cli::cli_inform(paste0( - "{mean(sample_numbers)} molecules on average across all", - " samples in assay {.emph {assayName}}" - )) + # get assayNames from molecules slot + all_assayNames <- names(object@molecules) + for (assayName in all_assayNames) { + cat(paste("- assay", assayName,":", "\n")) + # print number of transcripts in each sample and each assay + all_samples <- names(MoleculeExperiment::molecules( + object, assayName = assayName)[[assayName]]) + for (sample in all_samples) { + features <- object@molecules[[assayName]][[sample]] + numbers_ls <- lapply(names(features), + function(x) {nrow(features[[x]])}) + total <- sum(unlist(numbers_ls)) + cat(paste(sample, ":", total, "\n") + ) + } } } ) diff --git a/R/readSegMask.R b/R/readSegMask.R index 0973f76..b9fbbde 100644 --- a/R/readSegMask.R +++ b/R/readSegMask.R @@ -22,7 +22,8 @@ #' addBoundaries = NULL #' ) #' boundaries(me, "BIDcell_segmentation") <- readSegMask( -#' extent(me), # use the molecule extent to define the boundary extent +#' # use the molecule extent to define the boundary extent +#' extent(me, assayName = "detected"), #' path = segMask, assayName = "BIDcell_segmentation", #' sample_id = "sample1", background_value = 0 #' )