From ac2ff6e14d8a72d61b7cc1f7686e4c822db02875 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Wed, 2 Aug 2023 15:47:17 -0700 Subject: [PATCH 01/14] update docs --- docs/cellxgene_census_docsite_installation.md | 11 +- docs/cellxgene_census_docsite_quick_start.md | 138 ++++++++++++++---- 2 files changed, 119 insertions(+), 30 deletions(-) diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md index b10a57633..f5b448b0e 100644 --- a/docs/cellxgene_census_docsite_installation.md +++ b/docs/cellxgene_census_docsite_installation.md @@ -29,11 +29,14 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u ## R -The R package will be soon deposited into R-Universe. In the meantime you can directly install from github using the [devtools](https://devtools.r-lib.org/) R package. +From an R session, first install `tiledb` from R-Universe, the latest release in CRAN version is not yet available. -From an R session: +```r +install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev") +``` + +Then install `cellxgene.census` from R-Universe. ```r -install.packages("devtools") -devtools::install_github("chanzuckerberg/cellxgene-census/api/r/cellxgene.census") +install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev") ``` diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md index d140373e8..cf19a86fc 100644 --- a/docs/cellxgene_census_docsite_quick_start.md +++ b/docs/cellxgene_census_docsite_quick_start.md @@ -142,7 +142,6 @@ And you must close the query. ## R quick start -❗ **API is in beta and under rapid development.** Below are 3 examples of common operations you can do with the Census. As a reminder, the reference documentation for the API can be accessed via `?`: @@ -172,61 +171,148 @@ cell_metadata = cell_metadata$read( column_names = c("assay", "cell_type", "sex", "tissue", "tissue_general", "suspension_type", "disease") ) +# Concatenates results to an Arrow Table +cell_metadata = cell_metadata$concat() + # Convert to R tibble (dataframe) cell_metadata = as.data.frame(cell_metadata) print(cell_metadata) + +census$close() ``` -The output is a `tibble` with about 300K cells meeting our query criteria and the selected columns. +The output is a `tibble` with over 300K cells meeting our query criteria and the selected columns. ```bash -# A tibble: 305,735 × 7 - assay cell_type sex tissue tissue_general suspension_type disease - - 1 10x 3' v3 neuron female lung lung nucleus normal - 2 10x 3' v3 neuron female lung lung nucleus normal - 3 10x 3' v3 neuron female lung lung nucleus normal - 4 10x 3' v3 neuron female lung lung nucleus normal - 5 10x 3' v3 neuron female lung lung nucleus normal - 6 10x 3' v3 neuron female lung lung nucleus normal - 7 10x 3' v3 neuron female lung lung nucleus normal - 8 10x 3' v3 neuron female lung lung nucleus normal - 9 10x 3' v3 neuron female lung lung nucleus normal -10 10x 3' v3 neuron female lung lung nucleus normal -# ℹ 305,725 more rows +# A tibble: 379,224 × 7 + assay cell_type sex tissue tissue_general suspension_type disease + + 1 10x 3' v3 microglial cell fema… eye eye cell normal + 2 10x 3' v3 microglial cell fema… eye eye cell normal + 3 10x 3' v3 microglial cell fema… eye eye cell normal + 4 10x 3' v3 microglial cell fema… eye eye cell normal + 5 10x 3' v3 microglial cell fema… eye eye cell normal + 6 10x 3' v3 microglial cell fema… eye eye cell normal + 7 10x 3' v3 microglial cell fema… eye eye cell normal + 8 10x 3' v3 microglial cell fema… eye eye cell normal + 9 10x 3' v3 microglial cell fema… eye eye cell normal +10 10x 3' v3 microglial cell fema… eye eye cell normal +# ℹ 379,214 more rows # ℹ Use `print(n = ...)` to see more rows ``` -### Obtaining a slice as a Seurat object +### Obtaining a slice as a `Seurat` or `SingleCellExperiment` object -The following creates an Seurat object on-demand with the smaller set of cells and filtering only the genes `ENSG00000161798`, `ENSG00000188229`. +The following creates an Seurat object on-demand with a smaller set of cells and filtering only the genes `ENSG00000161798`, `ENSG00000188229`. -```python +```r library("cellxgene.census") +library("Seurat") census = open_soma() +organism = "Homo sapiens" +gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" +cell_filter = "cell_type == 'sympathetic neuron'" +cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease") + seurat_obj = get_seurat( census = census, - organism = "Homo sapiens", - var_value_filter = "feature_id %in% c('ENSG00000161798', 'ENSG00000188229')", - obs_value_filter = "sex == 'female' & cell_type %in% c('microglial cell', 'neuron')", - obs_column_names = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease") + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns ) print(seurat_obj) ``` -The output with about 5K cells and 2 genes can be now used for downstream analysis using [Seurat](https://satijalab.org/seurat/). +The output with over 4K cells and 2 genes can be now used for downstream analysis using [Seurat](https://satijalab.org/seurat/). ``` shell An object of class Seurat -2 features across 5876 samples within 1 assay +2 features across 4744 samples within 1 assay Active assay: RNA (2 features, 0 variable features) ``` +Similarly a `SingleCellExperiment` object can be created. + +```r +library("SingleCellExperiment") + +sce_obj = get_single_cell_experiment( + census = census, + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns +) + +print(sce_obj) +``` + +The output with over 4K cells and 2 genes can be now used for downstream analysis using the [Bioconductor ecosystem](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html). + +``` shell +class: SingleCellExperiment +dim: 2 4744 +metadata(0): +assays(1): counts +rownames(2): ENSG00000106034 ENSG00000107317 +rowData names(2): feature_name feature_length +colnames(4744): obs48350835 obs48351829 ... obs52469564 obs52470190 +colData names(6): assay cell_type ... suspension_type disease +reducedDimNames(0): +mainExpName: RNA +altExpNames(0): +``` + + ### Memory-efficient queries -Memory-efficient capabilities of the R API are still under active development. +This example provides a demonstration to access the data for larger-than-memory operations using **TileDB-SOMA** operations. + +First we initiate a lazy-evaluation query to access all brain and male cells from human. This query needs to be closed — `query$close()`. + +```r +library("cellxgene.census") +library("tiledbsoma") + +human = census$get("census_data")$get("homo_sapiens") +query = human$axis_query( + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new( + value_filter = "tissue == 'brain' & sex == 'male'" + ) +) + +# Continued below + + +``` + +Now we can iterate over the matrix count, as well as the cell and gene metadata. For example, to iterate over the matrix count, we can get an iterator and perform operations for each iteration. + +```r +# Continued from above + +iterator = query$X("raw")$tables() +# For sparse matrices use query$X("raw")$sparse_matrix() + +# Get an iterative slice as an Arrow Table +raw_slice = iterator$read_next() + +#... +``` + +And you can now perform operations on each iteration slice. This logic can be wrapped around a `while()` loop and checking the iteration state by monitoring the logical output of `iterator$read_complete()` + +And you must close the query and census. + +``` +# Continued from above +query.close() +census.close() +``` From 220fb9db3adeb7f6f26c09791cd5287584a8da23 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Wed, 2 Aug 2023 15:50:33 -0700 Subject: [PATCH 02/14] Editorial changes --- docs/cellxgene_census_docsite_installation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md index f5b448b0e..44a9de9cb 100644 --- a/docs/cellxgene_census_docsite_installation.md +++ b/docs/cellxgene_census_docsite_installation.md @@ -29,7 +29,7 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u ## R -From an R session, first install `tiledb` from R-Universe, the latest release in CRAN version is not yet available. +From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available. ```r install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev") From 888b0d02e03ebb685b2ee898543ddd457cb36f91 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Wed, 2 Aug 2023 19:35:14 -0700 Subject: [PATCH 03/14] Editorial changes --- api/r/cellxgene.census/README.md | 4 +-- docs/cellxgene_census_docsite_quick_start.md | 36 ++++++++++---------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md index eeb6879bd..24831c8ad 100644 --- a/api/r/cellxgene.census/README.md +++ b/api/r/cellxgene.census/README.md @@ -7,9 +7,7 @@ The `cellxgene.census` package provides an API to facilitate the use of the CZ CELLxGENE Discover Census. For more information about the API and the project visit the [chanzuckerberg/cellxgene-census GitHub repo](https://github.com/chanzuckerberg/cellxgene-census/). -**Status**: Pre-release, under rapid development. Expect API changes. - -Also see the [Python API](https://cellxgene-census.readthedocs.io/). +Also see the [Python API](https://chanzuckerberg.github.io/cellxgene-census/). ## Installation diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md index cf19a86fc..15cd2a08c 100644 --- a/docs/cellxgene_census_docsite_quick_start.md +++ b/docs/cellxgene_census_docsite_quick_start.md @@ -49,23 +49,24 @@ with cellxgene_census.open_soma() as census: print(cell_metadata) ``` -The output is a `pandas.DataFrame` with about 300K cells meeting our query criteria and the selected columns. +The output is a `pandas.DataFrame` with over 300K cells meeting our query criteria and the selected columns. ```bash - assay cell_type tissue tissue_general suspension_type disease sex -0 10x 3' v3 microglial cell eye eye cell normal female -1 10x 3' v3 microglial cell eye eye cell normal female -2 10x 3' v3 microglial cell eye eye cell normal female -3 10x 3' v3 microglial cell eye eye cell normal female -4 10x 3' v3 microglial cell eye eye cell normal female -... ... ... ... ... ... ... ... -299617 10x 3' v3 neuron cerebral cortex brain nucleus normal female -299618 10x 3' v3 neuron cerebral cortex brain nucleus normal female -299619 10x 3' v3 neuron cerebral cortex brain nucleus normal female -299620 10x 3' v3 neuron cerebral cortex brain nucleus normal female -299621 10x 3' v3 neuron cerebral cortex brain nucleus normal female - -[299622 rows x 7 columns] +The "stable" release is currently 2023-07-25. Specify 'census_version="2023-07-25"' in future calls to open_soma() to ensure data consistency. + assay cell_type tissue tissue_general suspension_type disease sex +0 10x 3' v3 microglial cell eye eye cell normal female +1 10x 3' v3 microglial cell eye eye cell normal female +2 10x 3' v3 microglial cell eye eye cell normal female +3 10x 3' v3 microglial cell eye eye cell normal female +4 10x 3' v3 microglial cell eye eye cell normal female +... ... ... ... ... ... ... ... +379219 microwell-seq neuron adrenal gland adrenal gland cell normal female +379220 microwell-seq neuron adrenal gland adrenal gland cell normal female +379221 microwell-seq neuron adrenal gland adrenal gland cell normal female +379222 microwell-seq neuron adrenal gland adrenal gland cell normal female +379223 microwell-seq neuron adrenal gland adrenal gland cell normal female + +[379224 rows x 7 columns] ``` ### Obtaining a slice as AnnData @@ -85,13 +86,12 @@ with cellxgene_census.open_soma() as census: ) print(adata) - ``` -The output with about 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/). +The output with over 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/). ``` bash -AnnData object with n_obs × n_vars = 299622 × 2 +AnnData object with n_obs × n_vars = 379224 × 2 obs: 'assay', 'cell_type', 'tissue', 'tissue_general', 'suspension_type', 'disease', 'sex' var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length' ``` From e0c918717f7d7508d5b983f30fe1962be089b0a1 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 3 Aug 2023 11:53:05 -0700 Subject: [PATCH 04/14] Add docs --- api/r/cellxgene.census/README.md | 74 +++++++++++++++----- docs/cellxgene_census_docsite_quick_start.md | 2 +- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md index 24831c8ad..616aa316e 100644 --- a/api/r/cellxgene.census/README.md +++ b/api/r/cellxgene.census/README.md @@ -1,37 +1,79 @@ -# CZ CELLxGENE Discover Census +# R package of CZ CELLxGENE Discover Census +This is the documentation for the R package `cellxgene.census` which is part of Census by CZ CELLxGENE Discover. For full details on Census data and capabilities please go to the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/). -The `cellxgene.census` package provides an API to facilitate the use of the CZ CELLxGENE Discover Census. For more information about the API and the project visit the [chanzuckerberg/cellxgene-census GitHub repo](https://github.com/chanzuckerberg/cellxgene-census/). +`cellxgene.census` provides an API to efficiently access the cloud-hosted Census single-cell data from R. In just a few seconds users can access any slice of Census data using cell or gene filters across hundreds of single-cell datasets. -Also see the [Python API](https://chanzuckerberg.github.io/cellxgene-census/). +Census data can be fetched in an iterative fashion for bigger-than-memory slices of data, or quickly exported to basic R structures, as well as `Seurat` or `SingleCellExperiment` objects for downstream analysis. ## Installation -You can install the development version of `cellxgene.census` from [GitHub](https://github.com/) with: +From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available. -``` r -# install.packages("devtools") -devtools::install_github("chanzuckerberg/cellxgene-census/api/r/cellxgene.census") -print(cellxgene.census::open_soma()) +```r +install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev") ``` -(minimal apt dependencies: r-base cmake git) +Then install `cellxgene.census` from R-Universe. -## Example +```r +install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev") +``` + +## Usage + +Check out the vignettes in the "Articles" section of the navigation bar on this site. We highly recommend the following vignettes as a starting point: + +* [Querying and fetching the single-cell data and cell/gene metadata](./articles/census_query_extract.html) +* [Learning about the CZ CELLxGENE Discover Census](./articles/comp_bio_census_info.html) + +You can also check out out the [quick start guide](https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_quick_start.html) in the main Census site. + + +### Example `Seurat` and `SingleCellExperiment` query + +The following creates an `Seurat` object on-demand with all sympathetic neurons in Census and filtering only for the genes `ENSG00000161798`, `ENSG00000188229`. + +```r +library("cellxgene.census") +library("Seurat") + +census = open_soma() + +organism = "Homo sapiens" +gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" +cell_filter = "cell_type == 'sympathetic neuron'" +cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease") + +seurat_obj = get_seurat( + census = census, + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns +) +``` + +And the following retrieves the same data as a `SingleCellExperiment` object. -This is a basic example which shows you how to solve a common problem: +```r +library("SingleCellExperiment") -``` r -library(cellxgene.census) -## basic example code +sce_obj = get_single_cell_experiment( + census = census, + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns +) ``` ## For More Help -For more help, please file a issue on the repo, or contact us at +For more help, please go visit the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/). -If you believe you have found a security issue, we would appreciate notification. Please send email to . +If you believe you have found a security issue, we would appreciate notification. Please send an email to . diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md index 15cd2a08c..ff9dfe8fe 100644 --- a/docs/cellxgene_census_docsite_quick_start.md +++ b/docs/cellxgene_census_docsite_quick_start.md @@ -88,7 +88,7 @@ with cellxgene_census.open_soma() as census: print(adata) ``` -The output with over 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/). +The output with about 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/). ``` bash AnnData object with n_obs × n_vars = 379224 × 2 From 07dada73a2fc6c847c818d690cff00ea395e26bd Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 3 Aug 2023 13:00:21 -0700 Subject: [PATCH 05/14] Render R doc-site --- api/r/cellxgene.census/docs/index.html | 68 +++++++++++++++++++------ api/r/cellxgene.census/docs/pkgdown.yml | 2 +- api/r/cellxgene.census/docs/search.json | 2 +- 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/api/r/cellxgene.census/docs/index.html b/api/r/cellxgene.census/docs/index.html index ab08476f2..25922b658 100644 --- a/api/r/cellxgene.census/docs/index.html +++ b/api/r/cellxgene.census/docs/index.html @@ -85,34 +85,70 @@
-
-

Example +

Usage

-

This is a basic example which shows you how to solve a common problem:

-
-library(cellxgene.census)
-## basic example code
+

Check out the vignettes in the “Articles” section of the navigation bar on this site. We highly recommend the following vignettes as a starting point:

+ +

You can also check out out the quick start guide in the main Census site.

+
+

Example Seurat and SingleCellExperiment query +

+

The following creates an Seurat object on-demand with all sympathetic neurons in Census and filtering only for the genes ENSG00000161798, ENSG00000188229.

+
+library("cellxgene.census")
+library("Seurat")
+
+census = open_soma()
+
+organism = "Homo sapiens"
+gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter =  "cell_type == 'sympathetic neuron'"
+cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+
+seurat_obj = get_seurat(
+   census = census,
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
+)
+

And the following retrieves the same data as a SingleCellExperiment object.

+
+library("SingleCellExperiment")
+
+sce_obj = get_single_cell_experiment(
+   census = census,
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
+)
+

For More Help

-

For more help, please file a issue on the repo, or contact us at

-

If you believe you have found a security issue, we would appreciate notification. Please send email to .

+

For more help, please go visit the main Census site.

+

If you believe you have found a security issue, we would appreciate notification. Please send an email to .

Usage @@ -112,19 +121,19 @@

Usage

Example Seurat and SingleCellExperiment query

-

The following creates an Seurat object on-demand with all sympathetic neurons in Census and filtering only for the genes ENSG00000161798, ENSG00000188229.

+

The following creates a Seurat object on-demand with all sympathetic neurons in Census and filtering only for the genes ENSG00000161798, ENSG00000188229.

 library("cellxgene.census")
 library("Seurat")
 
-census = open_soma()
+census <- open_soma()
 
-organism = "Homo sapiens"
-gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
-cell_filter =  "cell_type == 'sympathetic neuron'"
-cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+organism <- "Homo sapiens"
+gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter <-  "cell_type == 'sympathetic neuron'"
+cell_columns <- c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
 
-seurat_obj = get_seurat(
+seurat_obj <- get_seurat(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
@@ -135,7 +144,7 @@ 

Example Seurat
 library("SingleCellExperiment")
 
-sce_obj = get_single_cell_experiment(
+sce_obj <- get_single_cell_experiment(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
diff --git a/api/r/cellxgene.census/docs/pkgdown.yml b/api/r/cellxgene.census/docs/pkgdown.yml
index 8bb89f2ff..23f1b0ad1 100644
--- a/api/r/cellxgene.census/docs/pkgdown.yml
+++ b/api/r/cellxgene.census/docs/pkgdown.yml
@@ -8,5 +8,5 @@ articles:
   comp_bio_census_info: comp_bio_census_info.html
   comp_bio_data_integration: comp_bio_data_integration.html
   comp_bio_summarize_axis_query: comp_bio_summarize_axis_query.html
-last_built: 2023-08-03T22:40Z
+last_built: 2023-08-04T19:50Z
 
diff --git a/api/r/cellxgene.census/docs/reference/index.html b/api/r/cellxgene.census/docs/reference/index.html
index b8db002f0..b91e62bf8 100644
--- a/api/r/cellxgene.census/docs/reference/index.html
+++ b/api/r/cellxgene.census/docs/reference/index.html
@@ -10,7 +10,7 @@
     
     cellxgene.census
 
-    0.0.0.9000
+    0.0.9.9