From ac2ff6e14d8a72d61b7cc1f7686e4c822db02875 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Wed, 2 Aug 2023 15:47:17 -0700
Subject: [PATCH 01/14] update docs

---
 docs/cellxgene_census_docsite_installation.md |  11 +-
 docs/cellxgene_census_docsite_quick_start.md  | 138 ++++++++++++++----
 2 files changed, 119 insertions(+), 30 deletions(-)
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index b10a57633..f5b448b0e 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -29,11 +29,14 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u
 
 ## R
 
-The R package will be soon deposited into R-Universe. In the meantime you can directly install from github using the [devtools](https://devtools.r-lib.org/) R package.
+From an R session, first install `tiledb` from R-Universe, the latest release in CRAN version is not yet available.
 
-From an R session:
+```r
+install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")
+```
+
+Then install `cellxgene.census` from R-Universe.
 
 ```r
-install.packages("devtools")
-devtools::install_github("chanzuckerberg/cellxgene-census/api/r/cellxgene.census")
+install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev")
 ```
diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index d140373e8..cf19a86fc 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -142,7 +142,6 @@ And you must close the query.
 
 ## R quick start
 
-❗ **API is in beta and under rapid development.**
 
 Below are 3 examples of common operations you can do with the Census. As a reminder, the reference documentation for the API can be accessed via `?`:
 
@@ -172,61 +171,148 @@ cell_metadata = cell_metadata$read(
    column_names = c("assay", "cell_type", "sex", "tissue", "tissue_general", "suspension_type", "disease")
 )
 
+# Concatenates results to an Arrow Table
+cell_metadata = cell_metadata$concat()
+
 # Convert to R tibble (dataframe)
 cell_metadata = as.data.frame(cell_metadata)
 
 print(cell_metadata)
+
+census$close()
 ```
 
-The output is a `tibble` with about 300K cells meeting our query criteria and the selected columns.
+The output is a `tibble` with over 300K cells meeting our query criteria and the selected columns.
 
 ```bash
-# A tibble: 305,735 × 7
-   assay     cell_type sex    tissue tissue_general suspension_type disease
-   <chr>     <chr>     <chr>  <chr>  <chr>          <chr>           <chr>  
- 1 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 2 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 3 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 4 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 5 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 6 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 7 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 8 10x 3' v3 neuron    female lung   lung           nucleus         normal 
- 9 10x 3' v3 neuron    female lung   lung           nucleus         normal 
-10 10x 3' v3 neuron    female lung   lung           nucleus         normal 
-# ℹ 305,725 more rows
+# A tibble: 379,224 × 7
+   assay     cell_type       sex   tissue tissue_general suspension_type disease
+   <chr>     <chr>           <chr> <chr>  <chr>          <chr>           <chr>  
+ 1 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 2 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 3 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 4 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 5 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 6 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 7 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 8 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+ 9 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+10 10x 3' v3 microglial cell fema… eye    eye            cell            normal 
+# ℹ 379,214 more rows
 # ℹ Use `print(n = ...)` to see more rows
 ```
 
-### Obtaining a slice as a Seurat object 
+### Obtaining a slice as a `Seurat` or `SingleCellExperiment` object 
 
-The following creates an Seurat object on-demand with the smaller set of cells  and filtering only the genes `ENSG00000161798`, `ENSG00000188229`.
+The following creates an Seurat object on-demand with a smaller set of cells  and filtering only the genes `ENSG00000161798`, `ENSG00000188229`.
 
-```python
+```r
 library("cellxgene.census")
+library("Seurat")
 
 census = open_soma()
 
+organism = "Homo sapiens"
+gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter =  "cell_type == 'sympathetic neuron'"
+cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+
 seurat_obj = get_seurat(
    census = census,
-   organism = "Homo sapiens",
-   var_value_filter = "feature_id %in% c('ENSG00000161798', 'ENSG00000188229')",
-   obs_value_filter = "sex == 'female' & cell_type %in% c('microglial cell', 'neuron')",
-   obs_column_names = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
 )
 
 print(seurat_obj)
 ```
 
-The output with about 5K cells and 2 genes can be now used for downstream analysis using [Seurat](https://satijalab.org/seurat/).
+The output with over 4K cells and 2 genes can be now used for downstream analysis using [Seurat](https://satijalab.org/seurat/).
 
 ``` shell
 An object of class Seurat 
-2 features across 5876 samples within 1 assay 
+2 features across 4744 samples within 1 assay 
 Active assay: RNA (2 features, 0 variable features)
 ```
 
+Similarly a `SingleCellExperiment` object can be created.
+
+```r
+library("SingleCellExperiment")
+
+sce_obj = get_single_cell_experiment(
+   census = census,
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
+)
+
+print(sce_obj)
+```
+
+The output with over 4K cells and 2 genes can be now used for downstream analysis using the [Bioconductor ecosystem](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html).
+
+``` shell
+class: SingleCellExperiment 
+dim: 2 4744 
+metadata(0):
+assays(1): counts
+rownames(2): ENSG00000106034 ENSG00000107317
+rowData names(2): feature_name feature_length
+colnames(4744): obs48350835 obs48351829 ... obs52469564 obs52470190
+colData names(6): assay cell_type ... suspension_type disease
+reducedDimNames(0):
+mainExpName: RNA
+altExpNames(0):
+```
+
+
 ### Memory-efficient queries
 
-Memory-efficient capabilities of the R API are still under active development. 
+This example provides a demonstration to access the data for larger-than-memory operations using **TileDB-SOMA** operations. 
+
+First we initiate a lazy-evaluation query to access all brain and male cells from human. This query needs to be closed — `query$close()`.
+
+```r
+library("cellxgene.census")
+library("tiledbsoma")
+    
+human = census$get("census_data")$get("homo_sapiens")
+query = human$axis_query(
+  measurement_name = "RNA",
+  obs_query = SOMAAxisQuery$new(
+    value_filter = "tissue == 'brain' & sex == 'male'"
+  )
+)
+   
+# Continued below
+
+
+```
+
+Now we can iterate over the matrix count, as well as the cell and gene metadata. For example, to iterate over the matrix count, we can get an iterator and perform operations for each iteration.
+
+```r
+# Continued from above 
+
+iterator = query$X("raw")$tables()
+# For sparse matrices use query$X("raw")$sparse_matrix()
+
+# Get an iterative slice as an Arrow Table
+raw_slice = iterator$read_next() 
+
+#...
+```
+
+And you can now perform operations on each iteration slice. This logic can be wrapped around a `while()` loop and checking the iteration state by monitoring the logical output of `iterator$read_complete()`  
+
+And you must close the query and census.
+
+```
+# Continued from above
+query.close()
+census.close()
+```
 

From 220fb9db3adeb7f6f26c09791cd5287584a8da23 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Wed, 2 Aug 2023 15:50:33 -0700
Subject: [PATCH 02/14] Editorial changes

---
 docs/cellxgene_census_docsite_installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index f5b448b0e..44a9de9cb 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -29,7 +29,7 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u
 
 ## R
 
-From an R session, first install `tiledb` from R-Universe, the latest release in CRAN version is not yet available.
+From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
 ```r
 install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")

From 888b0d02e03ebb685b2ee898543ddd457cb36f91 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Wed, 2 Aug 2023 19:35:14 -0700
Subject: [PATCH 03/14] Editorial changes

---
 api/r/cellxgene.census/README.md             |  4 +--
 docs/cellxgene_census_docsite_quick_start.md | 36 ++++++++++----------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index eeb6879bd..24831c8ad 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -7,9 +7,7 @@
 
 The `cellxgene.census` package provides an API to facilitate the use of the CZ CELLxGENE Discover Census. For more information about the API and the project visit the [chanzuckerberg/cellxgene-census GitHub repo](https://github.com/chanzuckerberg/cellxgene-census/).
 
-**Status**: Pre-release, under rapid development. Expect API changes.
-
-Also see the [Python API](https://cellxgene-census.readthedocs.io/).
+Also see the [Python API](https://chanzuckerberg.github.io/cellxgene-census/).
 
 ## Installation
 
diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index cf19a86fc..15cd2a08c 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -49,23 +49,24 @@ with cellxgene_census.open_soma() as census:
     print(cell_metadata)
 ```
 
-The output is a `pandas.DataFrame` with about 300K cells meeting our query criteria and the selected columns.
+The output is a `pandas.DataFrame` with over 300K cells meeting our query criteria and the selected columns.
 
 ```bash
-            assay        cell_type           tissue tissue_general suspension_type disease     sex
-0       10x 3' v3  microglial cell              eye            eye            cell  normal  female
-1       10x 3' v3  microglial cell              eye            eye            cell  normal  female
-2       10x 3' v3  microglial cell              eye            eye            cell  normal  female
-3       10x 3' v3  microglial cell              eye            eye            cell  normal  female
-4       10x 3' v3  microglial cell              eye            eye            cell  normal  female
-...           ...              ...              ...            ...             ...     ...     ...
-299617  10x 3' v3           neuron  cerebral cortex          brain         nucleus  normal  female
-299618  10x 3' v3           neuron  cerebral cortex          brain         nucleus  normal  female
-299619  10x 3' v3           neuron  cerebral cortex          brain         nucleus  normal  female
-299620  10x 3' v3           neuron  cerebral cortex          brain         nucleus  normal  female
-299621  10x 3' v3           neuron  cerebral cortex          brain         nucleus  normal  female
-
-[299622 rows x 7 columns]
+The "stable" release is currently 2023-07-25. Specify 'census_version="2023-07-25"' in future calls to open_soma() to ensure data consistency.
+                assay        cell_type         tissue tissue_general suspension_type disease     sex
+0           10x 3' v3  microglial cell            eye            eye            cell  normal  female
+1           10x 3' v3  microglial cell            eye            eye            cell  normal  female
+2           10x 3' v3  microglial cell            eye            eye            cell  normal  female
+3           10x 3' v3  microglial cell            eye            eye            cell  normal  female
+4           10x 3' v3  microglial cell            eye            eye            cell  normal  female
+...               ...              ...            ...            ...             ...     ...     ...
+379219  microwell-seq           neuron  adrenal gland  adrenal gland            cell  normal  female
+379220  microwell-seq           neuron  adrenal gland  adrenal gland            cell  normal  female
+379221  microwell-seq           neuron  adrenal gland  adrenal gland            cell  normal  female
+379222  microwell-seq           neuron  adrenal gland  adrenal gland            cell  normal  female
+379223  microwell-seq           neuron  adrenal gland  adrenal gland            cell  normal  female
+
+[379224 rows x 7 columns]
 ```
 
 ### Obtaining a slice as AnnData 
@@ -85,13 +86,12 @@ with cellxgene_census.open_soma() as census:
     )
     
     print(adata)
-    
 ```
 
-The output with about 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/).
+The output with over 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/).
 
 ``` bash
-AnnData object with n_obs × n_vars = 299622 × 2
+AnnData object with n_obs × n_vars = 379224 × 2
     obs: 'assay', 'cell_type', 'tissue', 'tissue_general', 'suspension_type', 'disease', 'sex'
     var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length'
 ```

From e0c918717f7d7508d5b983f30fe1962be089b0a1 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Thu, 3 Aug 2023 11:53:05 -0700
Subject: [PATCH 04/14] Add docs

---
 api/r/cellxgene.census/README.md             | 74 +++++++++++++++-----
 docs/cellxgene_census_docsite_quick_start.md |  2 +-
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index 24831c8ad..616aa316e 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -1,37 +1,79 @@
 
-# CZ CELLxGENE Discover Census
+# R package of CZ CELLxGENE Discover Census
 
 <!-- badges: start -->
 <!-- badges: end -->
 
+This is the documentation for the R package `cellxgene.census` which is part of Census by CZ CELLxGENE Discover. For full details on Census data and capabilities please go to the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/).  
 
-The `cellxgene.census` package provides an API to facilitate the use of the CZ CELLxGENE Discover Census. For more information about the API and the project visit the [chanzuckerberg/cellxgene-census GitHub repo](https://github.com/chanzuckerberg/cellxgene-census/).
+`cellxgene.census` provides an API to efficiently access the cloud-hosted Census single-cell data from R. In just a few seconds users can access any slice of Census data using cell or gene filters across hundreds of single-cell datasets. 
 
-Also see the [Python API](https://chanzuckerberg.github.io/cellxgene-census/).
+Census data can be fetched in an iterative fashion for bigger-than-memory slices of data, or quickly exported to basic R structures, as well as `Seurat` or `SingleCellExperiment` objects for downstream analysis.
 
 ## Installation
 
-You can install the development version of `cellxgene.census` from [GitHub](https://github.com/) with:
+From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
-``` r
-# install.packages("devtools")
-devtools::install_github("chanzuckerberg/cellxgene-census/api/r/cellxgene.census")
-print(cellxgene.census::open_soma())
+```r
+install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")
 ```
 
-(minimal apt dependencies: r-base cmake git)
+Then install `cellxgene.census` from R-Universe.
 
-## Example
+```r
+install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev")
+```
+
+## Usage
+
+Check out the vignettes in the "Articles" section of the navigation bar on this site. We highly recommend the following vignettes as a starting point:
+
+* [Querying and fetching the single-cell data and cell/gene metadata](./articles/census_query_extract.html)
+* [Learning about the CZ CELLxGENE Discover Census](./articles/comp_bio_census_info.html)
+
+You can also check out out the [quick start guide](https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_quick_start.html) in the main Census site.
+
+
+### Example `Seurat` and `SingleCellExperiment` query
+
+The following creates an `Seurat` object on-demand with all sympathetic neurons in Census and filtering only for the genes `ENSG00000161798`, `ENSG00000188229`.
+
+```r
+library("cellxgene.census")
+library("Seurat")
+
+census = open_soma()
+
+organism = "Homo sapiens"
+gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter =  "cell_type == 'sympathetic neuron'"
+cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+
+seurat_obj = get_seurat(
+   census = census,
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
+)
+```
+
+And the following retrieves the same data as a `SingleCellExperiment` object.
 
-This is a basic example which shows you how to solve a common problem:
+```r
+library("SingleCellExperiment")
 
-``` r
-library(cellxgene.census)
-## basic example code
+sce_obj = get_single_cell_experiment(
+   census = census,
+   organism = organism,
+   var_value_filter = gene_filter,
+   obs_value_filter = cell_filter,
+   obs_column_names = cell_columns
+)
 ```
 
 ## For More Help
 
-For more help, please file a issue on the repo, or contact us at <soma@chanzuckerberg.com>
+For more help, please go visit the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/). 
 
-If you believe you have found a security issue, we would appreciate notification. Please send email to <security@chanzuckerberg.com>.
+If you believe you have found a security issue, we would appreciate notification. Please send an email to <security@chanzuckerberg.com>.
diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index 15cd2a08c..ff9dfe8fe 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -88,7 +88,7 @@ with cellxgene_census.open_soma() as census:
     print(adata)
 ```
 
-The output with over 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/).
+The output with about 300K cells and 2 genes can be now used for downstream analysis using [scanpy](https://scanpy.readthedocs.io/en/stable/).
 
 ``` bash
 AnnData object with n_obs × n_vars = 379224 × 2

From 07dada73a2fc6c847c818d690cff00ea395e26bd Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Thu, 3 Aug 2023 13:00:21 -0700
Subject: [PATCH 05/14] Render R doc-site

---
 api/r/cellxgene.census/docs/index.html  | 68 +++++++++++++++++++------
 api/r/cellxgene.census/docs/pkgdown.yml |  2 +-
 api/r/cellxgene.census/docs/search.json |  2 +-
 3 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/api/r/cellxgene.census/docs/index.html b/api/r/cellxgene.census/docs/index.html
index ab08476f2..25922b658 100644
--- a/api/r/cellxgene.census/docs/index.html
+++ b/api/r/cellxgene.census/docs/index.html
@@ -85,34 +85,70 @@ <h6 class="dropdown-header" data-toc-skip>cellxgene.census capabilities</h6>
 </nav><div class="container template-home">
 <div class="row">
   <main id="main" class="col-md-9"><div class="section level1">
-<div class="page-header"><h1 id="cz-cellxgene-discover-census">CZ CELLxGENE Discover Census<a class="anchor" aria-label="anchor" href="#cz-cellxgene-discover-census"></a>
+<div class="page-header"><h1 id="r-package-of-cz-cellxgene-discover-census">R package of CZ CELLxGENE Discover Census<a class="anchor" aria-label="anchor" href="#r-package-of-cz-cellxgene-discover-census"></a>
 </h1></div>
-<!-- badges: start --><!-- badges: end --><p>The <code>cellxgene.census</code> package provides an API to facilitate the use of the CZ CELLxGENE Discover Census. For more information about the API and the project visit the <a href="https://github.com/chanzuckerberg/cellxgene-census/" class="external-link">chanzuckerberg/cellxgene-census GitHub repo</a>.</p>
-<p><strong>Status</strong>: Pre-release, under rapid development. Expect API changes.</p>
-<p>Also see the <a href="https://cellxgene-census.readthedocs.io/" class="external-link">Python API</a>.</p>
+<!-- badges: start --><!-- badges: end --><p>This is the documentation for the R package <code>cellxgene.census</code> which is part of Census by CZ CELLxGENE Discover. For full details on Census data and capabilities please go to the <a href="https://chanzuckerberg.github.io/cellxgene-census/" class="external-link">main Census site</a>.</p>
+<p><code>cellxgene.census</code> provides an API to efficiently access the cloud-hosted Census single-cell data from R. In just a few seconds users can access any slice of Census data using cell or gene filters across hundreds of single-cell datasets.</p>
+<p>Census data can be fetched in an iterative fashion for bigger-than-memory slices of data, or quickly exported to basic R structures, as well as <code>Seurat</code> or <code>SingleCellExperiment</code> objects for downstream analysis.</p>
 <div class="section level2">
 <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
 </h2>
-<p>You can install the development version of <code>cellxgene.census</code> from <a href="https://github.com/" class="external-link">GitHub</a> with:</p>
+<p>From an R session, first install <code>tiledb</code> from R-Universe, the latest release in CRAN is not yet available.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="co"># install.packages("devtools")</span></span>
-<span><span class="fu">devtools</span><span class="fu">::</span><span class="fu"><a href="https://remotes.r-lib.org/reference/install_github.html" class="external-link">install_github</a></span><span class="op">(</span><span class="st">"chanzuckerberg/cellxgene-census/api/r/cellxgene.census"</span><span class="op">)</span></span>
-<span><span class="fu"><a href="https://rdrr.io/r/base/print.html" class="external-link">print</a></span><span class="op">(</span><span class="fu">cellxgene.census</span><span class="fu">::</span><span class="fu"><a href="reference/open_soma.html">open_soma</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
-<p>(minimal apt dependencies: r-base cmake git)</p>
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span><span class="st">"tiledb"</span>, repos <span class="op">=</span> <span class="st">"https://tiledb-inc.r-universe.dev"</span><span class="op">)</span></span></code></pre></div>
+<p>Then install <code>cellxgene.census</code> from R-Universe.</p>
+<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span><span class="st">"cellxgene.census"</span>, repos <span class="op">=</span> <span class="st">"https://chanzuckerberg.r-universe.dev"</span><span class="op">)</span></span></code></pre></div>
 </div>
 <div class="section level2">
-<h2 id="example">Example<a class="anchor" aria-label="anchor" href="#example"></a>
+<h2 id="usage">Usage<a class="anchor" aria-label="anchor" href="#usage"></a>
 </h2>
-<p>This is a basic example which shows you how to solve a common problem:</p>
-<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/chanzuckerberg/cellxgene-census" class="external-link">cellxgene.census</a></span><span class="op">)</span></span>
-<span><span class="co">## basic example code</span></span></code></pre></div>
+<p>Check out the vignettes in the “Articles” section of the navigation bar on this site. We highly recommend the following vignettes as a starting point:</p>
+<ul>
+<li><a href="./articles/census_query_extract.html">Querying and fetching the single-cell data and cell/gene metadata</a></li>
+<li><a href="./articles/comp_bio_census_info.html">Learning about the CZ CELLxGENE Discover Census</a></li>
+</ul>
+<p>You can also check out out the <a href="https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_quick_start.html" class="external-link">quick start guide</a> in the main Census site.</p>
+<div class="section level3">
+<h3 id="example-seurat-and-singlecellexperiment-query">Example <code>Seurat</code> and <code>SingleCellExperiment</code> query<a class="anchor" aria-label="anchor" href="#example-seurat-and-singlecellexperiment-query"></a>
+</h3>
+<p>The following creates an <code>Seurat</code> object on-demand with all sympathetic neurons in Census and filtering only for the genes <code>ENSG00000161798</code>, <code>ENSG00000188229</code>.</p>
+<div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st"><a href="https://github.com/chanzuckerberg/cellxgene-census" class="external-link">"cellxgene.census"</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st"><a href="https://satijalab.org/seurat" class="external-link">"Seurat"</a></span><span class="op">)</span></span>
+<span></span>
+<span><span class="va">census</span> <span class="op">=</span> <span class="fu"><a href="reference/open_soma.html">open_soma</a></span><span class="op">(</span><span class="op">)</span></span>
+<span></span>
+<span><span class="va">organism</span> <span class="op">=</span> <span class="st">"Homo sapiens"</span></span>
+<span><span class="va">gene_filter</span> <span class="op">=</span> <span class="st">"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"</span></span>
+<span><span class="va">cell_filter</span> <span class="op">=</span>  <span class="st">"cell_type == 'sympathetic neuron'"</span></span>
+<span><span class="va">cell_columns</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay"</span>, <span class="st">"cell_type"</span>, <span class="st">"tissue"</span>, <span class="st">"tissue_general"</span>, <span class="st">"suspension_type"</span>, <span class="st">"disease"</span><span class="op">)</span></span>
+<span></span>
+<span><span class="va">seurat_obj</span> <span class="op">=</span> <span class="fu"><a href="reference/get_seurat.html">get_seurat</a></span><span class="op">(</span></span>
+<span>   census <span class="op">=</span> <span class="va">census</span>,</span>
+<span>   organism <span class="op">=</span> <span class="va">organism</span>,</span>
+<span>   var_value_filter <span class="op">=</span> <span class="va">gene_filter</span>,</span>
+<span>   obs_value_filter <span class="op">=</span> <span class="va">cell_filter</span>,</span>
+<span>   obs_column_names <span class="op">=</span> <span class="va">cell_columns</span></span>
+<span><span class="op">)</span></span></code></pre></div>
+<p>And the following retrieves the same data as a <code>SingleCellExperiment</code> object.</p>
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st">"SingleCellExperiment"</span><span class="op">)</span></span>
+<span></span>
+<span><span class="va">sce_obj</span> <span class="op">=</span> <span class="fu"><a href="reference/get_single_cell_experiment.html">get_single_cell_experiment</a></span><span class="op">(</span></span>
+<span>   census <span class="op">=</span> <span class="va">census</span>,</span>
+<span>   organism <span class="op">=</span> <span class="va">organism</span>,</span>
+<span>   var_value_filter <span class="op">=</span> <span class="va">gene_filter</span>,</span>
+<span>   obs_value_filter <span class="op">=</span> <span class="va">cell_filter</span>,</span>
+<span>   obs_column_names <span class="op">=</span> <span class="va">cell_columns</span></span>
+<span><span class="op">)</span></span></code></pre></div>
+</div>
 </div>
 <div class="section level2">
 <h2 id="for-more-help">For More Help<a class="anchor" aria-label="anchor" href="#for-more-help"></a>
 </h2>
-<p>For more help, please file a issue on the repo, or contact us at <a href="mailto:soma@chanzuckerberg.com" class="email">soma@chanzuckerberg.com</a></p>
-<p>If you believe you have found a security issue, we would appreciate notification. Please send email to <a href="mailto:security@chanzuckerberg.com" class="email">security@chanzuckerberg.com</a>.</p>
+<p>For more help, please go visit the <a href="https://chanzuckerberg.github.io/cellxgene-census/" class="external-link">main Census site</a>.</p>
+<p>If you believe you have found a security issue, we would appreciate notification. Please send an email to <a href="mailto:security@chanzuckerberg.com" class="email">security@chanzuckerberg.com</a>.</p>
 </div>
 </div>
   </main><aside class="col-md-3"><div class="links">
diff --git a/api/r/cellxgene.census/docs/pkgdown.yml b/api/r/cellxgene.census/docs/pkgdown.yml
index 71070d8a0..7773c6672 100644
--- a/api/r/cellxgene.census/docs/pkgdown.yml
+++ b/api/r/cellxgene.census/docs/pkgdown.yml
@@ -8,5 +8,5 @@ articles:
   comp_bio_census_info: comp_bio_census_info.html
   comp_bio_data_integration: comp_bio_data_integration.html
   comp_bio_summarize_axis_query: comp_bio_summarize_axis_query.html
-last_built: 2023-08-02T19:37Z
+last_built: 2023-08-03T18:54Z
 
diff --git a/api/r/cellxgene.census/docs/search.json b/api/r/cellxgene.census/docs/search.json
index ca11875f5..cad4f3ad8 100644
--- a/api/r/cellxgene.census/docs/search.json
+++ b/api/r/cellxgene.census/docs/search.json
@@ -1 +1 @@
-[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #> # A tibble: 593 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           0 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… f171db61-… donor_p13_tr… #>  2           1 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… ecf2e08e-… All donors t… #>  3           2 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… 74cff64f-… All donors a… #>  4           3 f7cecffa-00b4-45… Mapping single… 10.1016/j.cce… 5af90777-… Single-cell … #>  5           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  6           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  7           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  8           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  9           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #> 10           9 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d8732da6-… Tabula Sapie… #> # ℹ 583 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience API providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- cellxgene.census::get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #> # A tibble: 60,664 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           6 ENSG00000256661 A2ML1-AS1               452 #>  8           7 ENSG00000184389 A3GALT2                1023 #>  9           8 ENSG00000128274 A4GALT                 3358 #> 10           9 ENSG00000118017 A4GNT                  1779 #> # ℹ 60,654 more rows"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"] is_present <- presence_matrix$take(dataset_joinid, var_joinid)$get_one_based_matrix()[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. dataset_joinids <- datasets_df$soma_joinid[   presence_matrix$take(j = var_joinid)$get_one_based_matrix()[, , drop = TRUE] != 0 ]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #> # A tibble: 42 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  2           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  3           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  4           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  5           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #>  6          10 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d77ec7d6-… Tabula Sapie… #>  7          11 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… cee11228-… Tabula Sapie… #>  8          13 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a2d4d33e-… Tabula Sapie… #>  9          14 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a0754256-… Tabula Sapie… #> 10          18 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… 6d41668c-… Tabula Sapie… #> # ℹ 32 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset var_joinids <- var_df$soma_joinid[   which(Matrix::colSums(presence_matrix$take(i = dataset_joinids)$get_one_based_matrix()[, , drop = TRUE]) > 0) ]  print(var_df[var_joinids, ]) #> # A tibble: 27,210 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           7 ENSG00000184389 A3GALT2                1023 #>  8           8 ENSG00000128274 A4GALT                 3358 #>  9           9 ENSG00000118017 A4GNT                  1779 #> 10          10 ENSG00000094914 AAAS                   4727 #> # ℹ 27,200 more rows"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 593 rows x 8 columns #> $soma_joinid <int64 not null> #> $collection_id <large_string not null> #> $collection_name <large_string not null> #> $collection_doi <large_string not null> #> $dataset_id <large_string not null> #> $dataset_title <large_string not null> #> $dataset_h5ad_path <large_string not null> #> $dataset_total_cell_count <int64 not null> census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #> # A tibble: 593 × 3 #>    dataset_id                           dataset_title             dataset_total_cell_c…¹ #>    <chr>                                <chr>                                      <int> #>  1 f171db61-e57e-4535-a06a-35d8b6ef8f2b donor_p13_trophoblasts                     31497 #>  2 ecf2e08e-2032-4a9e-b466-b65b395f4a02 All donors trophoblasts                    67070 #>  3 74cff64f-9da9-4b2a-9b3b-8a04a1598040 All donors all cell stat…                 286326 #>  4 5af90777-6760-4003-9dba-8f945fec6fdf Single-cell transcriptom…                 270855 #>  5 bd65a70f-b274-4133-b9dd-0d1431b6af34 Single-cell sequencing l…                 167283 #>  6 ff45e623-7f5f-46e3-b47d-56be0341f66b Tabula Sapiens - Pancreas                  13497 #>  7 f01bdd17-4902-40f5-86e3-240d66dd2587 Tabula Sapiens - Salivar…                  27199 #>  8 e6a11140-2545-46bc-929e-da243eed2cae Tabula Sapiens - Heart                     11505 #>  9 e5c63d94-593c-4338-a489-e1048599e751 Tabula Sapiens - Bladder                   24583 #> 10 d8732da6-8d1d-42d9-b625-f2416c30054b Tabula Sapiens - Trachea                    9522 #> # ℹ 583 more rows #> # ℹ abbreviated name: ¹​dataset_total_cell_count census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(it) census_data$get(it$name)) print(all_experiments) #> $mus_musculus #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms*  #>  #> $homo_sapiens #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(it) {   nrow(it$obs$read(column_names = c(\"soma_joinid\"))$concat()) })) print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 61656118 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 61656118 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #> # A tibble: 1 × 8 #>   soma_joinid collection_id      collection_name collection_doi dataset_id dataset_title #>         <int> <chr>              <chr>           <chr>          <chr>      <chr>         #> 1         522 0b9d8a04-bb9d-44d… Tabula Muris S… 10.1038/s4158… 0bd1a1de-… Bone marrow … #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int> obs_query <- tiledbsoma::SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- tiledbsoma::SOMAExperimentAxisQuery$new(   census_data$get(\"mus_musculus\"), \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) #> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package, #> which was just loaded, will retire in October 2023. #> Please refer to R-spatial evolution reports for details, especially #> https://r-spatial.org/r/2023/05/15/evolution4.html. #> It may be desirable to make the sf package available; #> package maintainers should consider adding sf to Suggests:. #> The sp package is now running under evolution status 2 #>      (status 2 uses the sf package in place of rgdal) print(dataset_seurat) #> An object of class Seurat  #> 52392 features across 40220 samples within 1 assay  #> Active assay: RNA (52392 features, 0 variable features)"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download cellxgene.census::download_source_h5ad(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method cellxgene.census::get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one <column> <op> <value> <column> <op> <column> Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #> # A tibble: 3 × 1 #>   sex     #>   <chr>   #> 1 unknown #> 2 female  #> 3 male as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #> # A tibble: 3,251,334 × 21 #>    soma_joinid dataset_id  assay assay_ontology_term_id cell_type cell_type_ontology_t…¹ #>          <int> <chr>       <chr> <chr>                  <chr>     <chr>                  #>  1           0 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  2           1 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  3           2 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  4           3 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  5           4 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  6           5 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  7           6 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  8           7 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  9           8 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #> 10           9 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #> # ℹ 3,251,324 more rows #> # ℹ abbreviated name: ¹​cell_type_ontology_term_id #> # ℹ 15 more variables: development_stage <chr>, #> #   development_stage_ontology_term_id <chr>, disease <chr>, #> #   disease_ontology_term_id <chr>, donor_id <chr>, is_primary_data <lgl>, #> #   self_reported_ethnicity <chr>, self_reported_ethnicity_ontology_term_id <chr>, #> #   sex <chr>, sex_ontology_term_id <chr>, suspension_type <chr>, tissue <chr>, … cell_metadata_b_cell <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",     column_names = \"disease\"   )$concat() ) table(cell_metadata_b_cell) #> disease #> chronic obstructive pulmonary disease                              COVID-19  #>                                  6369                                  2729  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(     value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",     column_names = c(\"feature_name\", \"feature_length\")   )$concat() ) #> # A tibble: 2 × 2 #>   feature_name feature_length #>   <chr>                 <int> #> 1 AQP5                   1884 #> 2 TUBB4B                 2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- cellxgene.census::get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) head(seurat_obj[[]]) #>                orig.ident nCount_RNA nFeature_RNA cell_type tissue_general  disease #> cell5946018 SeuratProject          0            0    B cell           lung COVID-19 #> cell5948526 SeuratProject          1            1    B cell           lung COVID-19 #> cell5949180 SeuratProject          0            0    B cell           lung COVID-19 #> cell5949686 SeuratProject          0            0    B cell           lung COVID-19 #> cell5950730 SeuratProject          0            0    B cell           lung COVID-19 #> cell5952197 SeuratProject          0            0    B cell           lung COVID-19 #>                 sex #> cell5946018    male #> cell5948526 unknown #> cell5949180    male #> cell5949686 unknown #> cell5950730    male #> cell5952197 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- cellxgene.census::get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(2): feature_name feature_length #> colnames(2729): obs5946018 obs5948526 ... obs48038140 obs48038164 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>              cell_type tissue_general     disease         sex #>            <character>    <character> <character> <character> #> obs5946018      B cell           lung    COVID-19        male #> obs5948526      B cell           lung    COVID-19     unknown #> obs5949180      B cell           lung    COVID-19        male #> obs5949686      B cell           lung    COVID-19     unknown #> obs5950730      B cell           lung    COVID-19        male #> obs5952197      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 2 columns #>                 feature_name feature_length #>                  <character>      <integer> #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #> # A tibble: 6 × 7 #>   soma_joinid organism     category ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>    <chr>                        <int>            <int> #> 1           0 Homo sapiens all      na                        33364242         56400873 #> 2           1 Homo sapiens assay    EFO:0008722                 264166           279635 #> 3           2 Homo sapiens assay    EFO:0008780                  25652            51304 #> 4           3 Homo sapiens assay    EFO:0008919                  89477           206754 #> 5           4 Homo sapiens assay    EFO:0008931                  78750           188248 #> 6           5 Homo sapiens assay    EFO:0008953                   4693             9386 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             19            9 #>   cell_type                        613          248 #>   disease                           64            5 #>   self_reported_ethnicity           26            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           220           66 #>   tissue_general                    54           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #> # A tibble: 2 × 7 #>   soma_joinid organism     category  ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>     <chr>                        <int>            <int> #> 1          69 Homo sapiens cell_type CL:0000129                  268114           370771 #> 2        1038 Mus musculus cell_type CL:0000129                   48998            62617 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"cell_type\", \"is_primary_data\")   )$concat() ) #> # A tibble: 56,400,873 × 2 #>    cell_type                     is_primary_data #>    <chr>                         <lgl>           #>  1 syncytiotrophoblast cell      FALSE           #>  2 placental villous trophoblast FALSE           #>  3 syncytiotrophoblast cell      FALSE           #>  4 syncytiotrophoblast cell      FALSE           #>  5 extravillous trophoblast      FALSE           #>  6 placental villous trophoblast FALSE           #>  7 syncytiotrophoblast cell      FALSE           #>  8 extravillous trophoblast      FALSE           #>  9 placental villous trophoblast FALSE           #> 10 syncytiotrophoblast cell      FALSE           #> # ℹ 56,400,863 more rows human_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE\"   )$concat() )$cell_type length(human_cell_types) #> [1] 33364242 human_cell_type_counts <- table(human_cell_types) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #> human_cell_types #>                          neuron            glutamatergic neuron  #>                         2673669                         1541605  #> CD4-positive, alpha-beta T cell CD8-positive, alpha-beta T cell  #>                         1258976                         1235987  #>              classical monocyte                          B cell  #>                         1030996                          908651  #>                     native cell             natural killer cell  #>                          889262                          768755  #>                      macrophage                 oligodendrocyte  #>                          721687                          710242 dim(human_cell_type_counts) #> [1] 599"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"human_liver_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE && tissue_general == 'liver'\"   )$concat() )$cell_type sort(table(human_liver_cell_types), decreasing = TRUE)[1:10] #> human_liver_cell_types #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"t_cells_diseased <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"disease\", \"tissue_general\"),     value_filter = \"is_primary_data == TRUE && disease != 'normal' && cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\"   )$concat() ) print(table(t_cells_diseased)) #>                                        tissue_general #> disease                                 adipose tissue adrenal gland  blood blood clot #>   B-cell non-Hodgkin lymphoma                        0             0  62499          0 #>   breast cancer                                      0             0      0          0 #>   chronic obstructive pulmonary disease              0             0      0          0 #>   chronic rhinitis                                   0             0      0          0 #>   clear cell renal carcinoma                         0             0   6548          0 #>   COVID-19                                           0             0 819428          0 #>   Crohn disease                                      0             0      0          0 #>   cystic fibrosis                                    0             0      0          0 #>   Down syndrome                                      0             0      0          0 #>   follicular lymphoma                                0             0      0          0 #>   influenza                                          0             0   8871          0 #>   interstitial lung disease                          0             0      0          0 #>   kidney benign neoplasm                             0             0     20          0 #>   kidney oncocytoma                                  0             0     16          0 #>   lung adenocarcinoma                                0           205      0          0 #>                                        tissue_general #> disease                                 bone marrow  brain breast  colon kidney  liver #>   B-cell non-Hodgkin lymphoma                     0      0      0      0      0      0 #>   breast cancer                                   0      0   1850      0      0      0 #>   chronic obstructive pulmonary disease           0      0      0      0      0      0 #>   chronic rhinitis                                0      0      0      0      0      0 #>   clear cell renal carcinoma                      0      0      0      0  20540      0 #>   COVID-19                                        0      0      0      0      0      0 #>   Crohn disease                                   0      0      0  17490      0      0 #>   cystic fibrosis                                 0      0      0      0      0      0 #>   Down syndrome                                 181      0      0      0      0      0 #>   follicular lymphoma                             0      0      0      0      0      0 #>   influenza                                       0      0      0      0      0      0 #>   interstitial lung disease                       0      0      0      0      0      0 #>   kidney benign neoplasm                          0      0      0      0     10      0 #>   kidney oncocytoma                               0      0      0      0   2408      0 #>   lung adenocarcinoma                             0   3274      0      0      0    507 #>                                        tissue_general #> disease                                   lung lymph node   nose pleural fluid #>   B-cell non-Hodgkin lymphoma                0          0      0             0 #>   breast cancer                              0          0      0             0 #>   chronic obstructive pulmonary disease   9382          0      0             0 #>   chronic rhinitis                           0          0    909             0 #>   clear cell renal carcinoma                 0         36      0             0 #>   COVID-19                               30578          0     13             0 #>   Crohn disease                              0          0      0             0 #>   cystic fibrosis                            7          0      0             0 #>   Down syndrome                              0          0      0             0 #>   follicular lymphoma                        0       1089      0             0 #>   influenza                                  0          0      0             0 #>   interstitial lung disease               1803          0      0             0 #>   kidney benign neoplasm                     0          0      0             0 #>   kidney oncocytoma                          0          0      0             0 #>   lung adenocarcinoma                   215013      24969      0         11558 #>                                        tissue_general #> disease                                 respiratory system saliva small intestine #>   B-cell non-Hodgkin lymphoma                            0      0               0 #>   breast cancer                                          0      0               0 #>   chronic obstructive pulmonary disease                  0      0               0 #>   chronic rhinitis                                       0      0               0 #>   clear cell renal carcinoma                             0      0               0 #>   COVID-19                                               4     41               0 #>   Crohn disease                                          0      0           52029 #>   cystic fibrosis                                        0      0               0 #>   Down syndrome                                          0      0               0 #>   follicular lymphoma                                    0      0               0 #>   influenza                                              0      0               0 #>   interstitial lung disease                              0      0               0 #>   kidney benign neoplasm                                 0      0               0 #>   kidney oncocytoma                                      0      0               0 #>   lung adenocarcinoma                                    0      0               0 #>  [ reached getOption(\"max.print\") -- omitted 10 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load modules needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load AnnData object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(cellxgene.census) library(Seurat) library(patchwork) census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #> # A tibble: 2 × 8 #>   soma_joinid collection_id            collection_name collection_doi dataset_id #>         <int> <chr>                    <chr>           <chr>          <chr>      #> 1         525 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 4546e757-… #> 2         547 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 6202a243-… #> # ℹ 3 more variables: dataset_title <chr>, dataset_h5ad_path <chr>, #> #   dataset_total_cell_count <int> tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- cellxgene.census::get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj[[\"RNA\"]]@meta.features$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:10) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7026 anchors #> Filtering anchors #>  Retained 4880 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 13:02:30 UMAP embedding parameters a = 0.9922 b = 1.112 #> 13:02:30 Read 10153 rows and found 30 numeric columns #> 13:02:30 Using Annoy for neighbor search, n_neighbors = 30 #> 13:02:30 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 13:02:32 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//Rtmpfho1WF/file7d5027ced41d #> 13:02:32 Searching Annoy index using 1 thread, search_k = 3000 #> 13:02:38 Annoy recall = 100% #> 13:02:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 13:02:39 Initializing from normalized Laplacian + noise (using irlba) #> 13:02:40 Commencing optimization for 200 epochs, with 410804 positive edges #> 13:02:49 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- as.data.frame(human$obs$read(   column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)  cat(paste(   \"There are\",   length(unique_cell_type_ontology_term_id),   \"cell types in the Census! The first few are:\" )) #> There are 613 cell types in the Census! The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000525\" \"CL:2000060\" \"CL:0008036\" \"CL:0002488\" \"CL:0002343\" \"CL:0000084\""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = paste(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\", sep = \"\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column as an R data frame. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id) cat(paste(   \"There are \",   length(unique_cell_type_ontology_term_id),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   sep = \"\" )) #> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048! #> The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0002063\" \"CL:0000775\" \"CL:0001044\" \"CL:0001050\" \"CL:0000814\" \"CL:0000071\"  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] cat(paste(\"The top 10 cell types where tissue_ontology_term_id ==\", LUNG_TISSUE)) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 print(top_10) #>  #> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860 CL:0000623  #>     562038     526859     323985     323610     266333     255425     205013     164944  #> CL:0001064 CL:0002632  #>     149067     132243 # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669 CL:0000003  #>     159096      84750      79618      64190      61830      32088      27515      22707  #> CL:0000057 CL:0002144  #>      20117      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)   obs_df <- as.data.frame(     census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)$concat()   )   total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(paste(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\", sep = \"\"))   } } #> [1] \"mus_musculus\" #>   Unique cell_type_ontology_term_id values: 248 #>   Unique assay_ontology_term_id values: 9 #>   Unique tissue_ontology_term_id values: 66 #> [1] \"homo_sapiens\" #>   Unique cell_type_ontology_term_id values: 613 #>   Unique assay_ontology_term_id values: 19 #>   Unique tissue_ontology_term_id values: 220 cat(paste(\"Complete Census contains\", total_cells, \"cells.\")) #> Complete Census contains 61656118 cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative (2023). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 0.0.0.9000, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative}},   year = {2023},   note = {R package version 0.0.0.9000},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"cellxgene.census package provides API facilitate use CZ CELLxGENE Discover Census. information API project visit chanzuckerberg/cellxgene-census GitHub repo. Status: Pre-release, rapid development. Expect API changes. Also see Python API.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"can install development version cellxgene.census GitHub : (minimal apt dependencies: r-base cmake git)","code":"# install.packages(\"devtools\") devtools::install_github(\"chanzuckerberg/cellxgene-census/api/r/cellxgene.census\") print(cellxgene.census::open_soma())"},{"path":"/index.html","id":"example","dir":"","previous_headings":"","what":"Example","title":"CZ CELLxGENE Discover Cell Census","text":"basic example shows solve common problem:","code":"library(cellxgene.census) ## basic example code"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please file issue repo, contact us soma@chanzuckerberg.com believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-07-25 #>                                                  soma.uri soma.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/soma/      us-west-2 #>                                                  h5ads.uri h5ads.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/       us-west-2 #>   do_not_delete  alias census_version #> 1          TRUE stable         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-07-25 #> latest                     2023-08-01 #> 2023-05-15                 2023-05-15 #> 2023-07-03                 2023-07-03 #> 2023-07-10                 2023-07-10 #> 2023-07-17                 2023-07-17 #> 2023-07-24                 2023-07-24 #> 2023-07-25                 2023-07-25 #> 2023-08-01                 2023-08-01 #>                                                           soma.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/soma/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/soma/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/soma/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/soma/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/soma/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #>            soma.s3_region #> stable          us-west-2 #> latest          us-west-2 #> 2023-05-15      us-west-2 #> 2023-07-03      us-west-2 #> 2023-07-10      us-west-2 #> 2023-07-17      us-west-2 #> 2023-07-24      us-west-2 #> 2023-07-25      us-west-2 #> 2023-08-01      us-west-2 #>                                                           h5ads.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/h5ads/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/h5ads/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/h5ads/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/h5ads/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/h5ads/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #>            h5ads.s3_region do_not_delete  alias #> stable           us-west-2          TRUE stable #> latest           us-west-2         FALSE latest #> 2023-05-15       us-west-2          TRUE        #> 2023-07-03       us-west-2            NA        #> 2023-07-10       us-west-2            NA        #> 2023-07-17       us-west-2            NA        #> 2023-07-24       us-west-2            NA        #> 2023-07-25       us-west-2          TRUE        #> 2023-08-01       us-west-2         FALSE"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-data-public/cell-census/2023-07-25/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(census_version_description, ...)"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(census_version = \"stable\", uri = NULL, tiledbsoma_ctx = NULL)"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused.","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417       census$close()"}]
+[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #> # A tibble: 593 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           0 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… f171db61-… donor_p13_tr… #>  2           1 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… ecf2e08e-… All donors t… #>  3           2 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… 74cff64f-… All donors a… #>  4           3 f7cecffa-00b4-45… Mapping single… 10.1016/j.cce… 5af90777-… Single-cell … #>  5           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  6           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  7           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  8           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  9           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #> 10           9 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d8732da6-… Tabula Sapie… #> # ℹ 583 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience API providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- cellxgene.census::get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #> # A tibble: 60,664 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           6 ENSG00000256661 A2ML1-AS1               452 #>  8           7 ENSG00000184389 A3GALT2                1023 #>  9           8 ENSG00000128274 A4GALT                 3358 #> 10           9 ENSG00000118017 A4GNT                  1779 #> # ℹ 60,654 more rows"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"] is_present <- presence_matrix$take(dataset_joinid, var_joinid)$get_one_based_matrix()[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. dataset_joinids <- datasets_df$soma_joinid[   presence_matrix$take(j = var_joinid)$get_one_based_matrix()[, , drop = TRUE] != 0 ]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #> # A tibble: 42 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  2           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  3           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  4           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  5           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #>  6          10 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d77ec7d6-… Tabula Sapie… #>  7          11 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… cee11228-… Tabula Sapie… #>  8          13 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a2d4d33e-… Tabula Sapie… #>  9          14 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a0754256-… Tabula Sapie… #> 10          18 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… 6d41668c-… Tabula Sapie… #> # ℹ 32 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset var_joinids <- var_df$soma_joinid[   which(Matrix::colSums(presence_matrix$take(i = dataset_joinids)$get_one_based_matrix()[, , drop = TRUE]) > 0) ]  print(var_df[var_joinids, ]) #> # A tibble: 27,210 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           7 ENSG00000184389 A3GALT2                1023 #>  8           8 ENSG00000128274 A4GALT                 3358 #>  9           9 ENSG00000118017 A4GNT                  1779 #> 10          10 ENSG00000094914 AAAS                   4727 #> # ℹ 27,200 more rows"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 593 rows x 8 columns #> $soma_joinid <int64 not null> #> $collection_id <large_string not null> #> $collection_name <large_string not null> #> $collection_doi <large_string not null> #> $dataset_id <large_string not null> #> $dataset_title <large_string not null> #> $dataset_h5ad_path <large_string not null> #> $dataset_total_cell_count <int64 not null> census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #> # A tibble: 593 × 3 #>    dataset_id                           dataset_title             dataset_total_cell_c…¹ #>    <chr>                                <chr>                                      <int> #>  1 f171db61-e57e-4535-a06a-35d8b6ef8f2b donor_p13_trophoblasts                     31497 #>  2 ecf2e08e-2032-4a9e-b466-b65b395f4a02 All donors trophoblasts                    67070 #>  3 74cff64f-9da9-4b2a-9b3b-8a04a1598040 All donors all cell stat…                 286326 #>  4 5af90777-6760-4003-9dba-8f945fec6fdf Single-cell transcriptom…                 270855 #>  5 bd65a70f-b274-4133-b9dd-0d1431b6af34 Single-cell sequencing l…                 167283 #>  6 ff45e623-7f5f-46e3-b47d-56be0341f66b Tabula Sapiens - Pancreas                  13497 #>  7 f01bdd17-4902-40f5-86e3-240d66dd2587 Tabula Sapiens - Salivar…                  27199 #>  8 e6a11140-2545-46bc-929e-da243eed2cae Tabula Sapiens - Heart                     11505 #>  9 e5c63d94-593c-4338-a489-e1048599e751 Tabula Sapiens - Bladder                   24583 #> 10 d8732da6-8d1d-42d9-b625-f2416c30054b Tabula Sapiens - Trachea                    9522 #> # ℹ 583 more rows #> # ℹ abbreviated name: ¹​dataset_total_cell_count census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(it) census_data$get(it$name)) print(all_experiments) #> $mus_musculus #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms*  #>  #> $homo_sapiens #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(it) {   nrow(it$obs$read(column_names = c(\"soma_joinid\"))$concat()) })) print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 61656118 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 61656118 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #> # A tibble: 1 × 8 #>   soma_joinid collection_id      collection_name collection_doi dataset_id dataset_title #>         <int> <chr>              <chr>           <chr>          <chr>      <chr>         #> 1         522 0b9d8a04-bb9d-44d… Tabula Muris S… 10.1038/s4158… 0bd1a1de-… Bone marrow … #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int> obs_query <- tiledbsoma::SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- tiledbsoma::SOMAExperimentAxisQuery$new(   census_data$get(\"mus_musculus\"), \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) #> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package, #> which was just loaded, will retire in October 2023. #> Please refer to R-spatial evolution reports for details, especially #> https://r-spatial.org/r/2023/05/15/evolution4.html. #> It may be desirable to make the sf package available; #> package maintainers should consider adding sf to Suggests:. #> The sp package is now running under evolution status 2 #>      (status 2 uses the sf package in place of rgdal) print(dataset_seurat) #> An object of class Seurat  #> 52392 features across 40220 samples within 1 assay  #> Active assay: RNA (52392 features, 0 variable features)"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download cellxgene.census::download_source_h5ad(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method cellxgene.census::get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one <column> <op> <value> <column> <op> <column> Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #> # A tibble: 3 × 1 #>   sex     #>   <chr>   #> 1 unknown #> 2 female  #> 3 male as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #> # A tibble: 3,251,334 × 21 #>    soma_joinid dataset_id  assay assay_ontology_term_id cell_type cell_type_ontology_t…¹ #>          <int> <chr>       <chr> <chr>                  <chr>     <chr>                  #>  1           0 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  2           1 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  3           2 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  4           3 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  5           4 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  6           5 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  7           6 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  8           7 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  9           8 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #> 10           9 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #> # ℹ 3,251,324 more rows #> # ℹ abbreviated name: ¹​cell_type_ontology_term_id #> # ℹ 15 more variables: development_stage <chr>, #> #   development_stage_ontology_term_id <chr>, disease <chr>, #> #   disease_ontology_term_id <chr>, donor_id <chr>, is_primary_data <lgl>, #> #   self_reported_ethnicity <chr>, self_reported_ethnicity_ontology_term_id <chr>, #> #   sex <chr>, sex_ontology_term_id <chr>, suspension_type <chr>, tissue <chr>, … cell_metadata_b_cell <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",     column_names = \"disease\"   )$concat() ) table(cell_metadata_b_cell) #> disease #> chronic obstructive pulmonary disease                              COVID-19  #>                                  6369                                  2729  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(     value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",     column_names = c(\"feature_name\", \"feature_length\")   )$concat() ) #> # A tibble: 2 × 2 #>   feature_name feature_length #>   <chr>                 <int> #> 1 AQP5                   1884 #> 2 TUBB4B                 2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- cellxgene.census::get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) head(seurat_obj[[]]) #>                orig.ident nCount_RNA nFeature_RNA cell_type tissue_general  disease #> cell5946018 SeuratProject          0            0    B cell           lung COVID-19 #> cell5948526 SeuratProject          1            1    B cell           lung COVID-19 #> cell5949180 SeuratProject          0            0    B cell           lung COVID-19 #> cell5949686 SeuratProject          0            0    B cell           lung COVID-19 #> cell5950730 SeuratProject          0            0    B cell           lung COVID-19 #> cell5952197 SeuratProject          0            0    B cell           lung COVID-19 #>                 sex #> cell5946018    male #> cell5948526 unknown #> cell5949180    male #> cell5949686 unknown #> cell5950730    male #> cell5952197 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- cellxgene.census::get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(2): feature_name feature_length #> colnames(2729): obs5946018 obs5948526 ... obs48038140 obs48038164 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>              cell_type tissue_general     disease         sex #>            <character>    <character> <character> <character> #> obs5946018      B cell           lung    COVID-19        male #> obs5948526      B cell           lung    COVID-19     unknown #> obs5949180      B cell           lung    COVID-19        male #> obs5949686      B cell           lung    COVID-19     unknown #> obs5950730      B cell           lung    COVID-19        male #> obs5952197      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 2 columns #>                 feature_name feature_length #>                  <character>      <integer> #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #> # A tibble: 6 × 7 #>   soma_joinid organism     category ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>    <chr>                        <int>            <int> #> 1           0 Homo sapiens all      na                        33364242         56400873 #> 2           1 Homo sapiens assay    EFO:0008722                 264166           279635 #> 3           2 Homo sapiens assay    EFO:0008780                  25652            51304 #> 4           3 Homo sapiens assay    EFO:0008919                  89477           206754 #> 5           4 Homo sapiens assay    EFO:0008931                  78750           188248 #> 6           5 Homo sapiens assay    EFO:0008953                   4693             9386 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             19            9 #>   cell_type                        613          248 #>   disease                           64            5 #>   self_reported_ethnicity           26            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           220           66 #>   tissue_general                    54           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #> # A tibble: 2 × 7 #>   soma_joinid organism     category  ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>     <chr>                        <int>            <int> #> 1          69 Homo sapiens cell_type CL:0000129                  268114           370771 #> 2        1038 Mus musculus cell_type CL:0000129                   48998            62617 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"cell_type\", \"is_primary_data\")   )$concat() ) #> # A tibble: 56,400,873 × 2 #>    cell_type                     is_primary_data #>    <chr>                         <lgl>           #>  1 syncytiotrophoblast cell      FALSE           #>  2 placental villous trophoblast FALSE           #>  3 syncytiotrophoblast cell      FALSE           #>  4 syncytiotrophoblast cell      FALSE           #>  5 extravillous trophoblast      FALSE           #>  6 placental villous trophoblast FALSE           #>  7 syncytiotrophoblast cell      FALSE           #>  8 extravillous trophoblast      FALSE           #>  9 placental villous trophoblast FALSE           #> 10 syncytiotrophoblast cell      FALSE           #> # ℹ 56,400,863 more rows human_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE\"   )$concat() )$cell_type length(human_cell_types) #> [1] 33364242 human_cell_type_counts <- table(human_cell_types) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #> human_cell_types #>                          neuron            glutamatergic neuron  #>                         2673669                         1541605  #> CD4-positive, alpha-beta T cell CD8-positive, alpha-beta T cell  #>                         1258976                         1235987  #>              classical monocyte                          B cell  #>                         1030996                          908651  #>                     native cell             natural killer cell  #>                          889262                          768755  #>                      macrophage                 oligodendrocyte  #>                          721687                          710242 dim(human_cell_type_counts) #> [1] 599"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"human_liver_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE && tissue_general == 'liver'\"   )$concat() )$cell_type sort(table(human_liver_cell_types), decreasing = TRUE)[1:10] #> human_liver_cell_types #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"t_cells_diseased <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"disease\", \"tissue_general\"),     value_filter = \"is_primary_data == TRUE && disease != 'normal' && cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\"   )$concat() ) print(table(t_cells_diseased)) #>                                        tissue_general #> disease                                 adipose tissue adrenal gland  blood blood clot #>   B-cell non-Hodgkin lymphoma                        0             0  62499          0 #>   breast cancer                                      0             0      0          0 #>   chronic obstructive pulmonary disease              0             0      0          0 #>   chronic rhinitis                                   0             0      0          0 #>   clear cell renal carcinoma                         0             0   6548          0 #>   COVID-19                                           0             0 819428          0 #>   Crohn disease                                      0             0      0          0 #>   cystic fibrosis                                    0             0      0          0 #>   Down syndrome                                      0             0      0          0 #>   follicular lymphoma                                0             0      0          0 #>   influenza                                          0             0   8871          0 #>   interstitial lung disease                          0             0      0          0 #>   kidney benign neoplasm                             0             0     20          0 #>   kidney oncocytoma                                  0             0     16          0 #>   lung adenocarcinoma                                0           205      0          0 #>                                        tissue_general #> disease                                 bone marrow  brain breast  colon kidney  liver #>   B-cell non-Hodgkin lymphoma                     0      0      0      0      0      0 #>   breast cancer                                   0      0   1850      0      0      0 #>   chronic obstructive pulmonary disease           0      0      0      0      0      0 #>   chronic rhinitis                                0      0      0      0      0      0 #>   clear cell renal carcinoma                      0      0      0      0  20540      0 #>   COVID-19                                        0      0      0      0      0      0 #>   Crohn disease                                   0      0      0  17490      0      0 #>   cystic fibrosis                                 0      0      0      0      0      0 #>   Down syndrome                                 181      0      0      0      0      0 #>   follicular lymphoma                             0      0      0      0      0      0 #>   influenza                                       0      0      0      0      0      0 #>   interstitial lung disease                       0      0      0      0      0      0 #>   kidney benign neoplasm                          0      0      0      0     10      0 #>   kidney oncocytoma                               0      0      0      0   2408      0 #>   lung adenocarcinoma                             0   3274      0      0      0    507 #>                                        tissue_general #> disease                                   lung lymph node   nose pleural fluid #>   B-cell non-Hodgkin lymphoma                0          0      0             0 #>   breast cancer                              0          0      0             0 #>   chronic obstructive pulmonary disease   9382          0      0             0 #>   chronic rhinitis                           0          0    909             0 #>   clear cell renal carcinoma                 0         36      0             0 #>   COVID-19                               30578          0     13             0 #>   Crohn disease                              0          0      0             0 #>   cystic fibrosis                            7          0      0             0 #>   Down syndrome                              0          0      0             0 #>   follicular lymphoma                        0       1089      0             0 #>   influenza                                  0          0      0             0 #>   interstitial lung disease               1803          0      0             0 #>   kidney benign neoplasm                     0          0      0             0 #>   kidney oncocytoma                          0          0      0             0 #>   lung adenocarcinoma                   215013      24969      0         11558 #>                                        tissue_general #> disease                                 respiratory system saliva small intestine #>   B-cell non-Hodgkin lymphoma                            0      0               0 #>   breast cancer                                          0      0               0 #>   chronic obstructive pulmonary disease                  0      0               0 #>   chronic rhinitis                                       0      0               0 #>   clear cell renal carcinoma                             0      0               0 #>   COVID-19                                               4     41               0 #>   Crohn disease                                          0      0           52029 #>   cystic fibrosis                                        0      0               0 #>   Down syndrome                                          0      0               0 #>   follicular lymphoma                                    0      0               0 #>   influenza                                              0      0               0 #>   interstitial lung disease                              0      0               0 #>   kidney benign neoplasm                                 0      0               0 #>   kidney oncocytoma                                      0      0               0 #>   lung adenocarcinoma                                    0      0               0 #>  [ reached getOption(\"max.print\") -- omitted 10 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load modules needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load AnnData object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(cellxgene.census) library(Seurat) library(patchwork) census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #> # A tibble: 2 × 8 #>   soma_joinid collection_id            collection_name collection_doi dataset_id #>         <int> <chr>                    <chr>           <chr>          <chr>      #> 1         525 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 4546e757-… #> 2         547 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 6202a243-… #> # ℹ 3 more variables: dataset_title <chr>, dataset_h5ad_path <chr>, #> #   dataset_total_cell_count <int> tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- cellxgene.census::get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj[[\"RNA\"]]@meta.features$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:10) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7026 anchors #> Filtering anchors #>  Retained 4880 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 12:19:28 UMAP embedding parameters a = 0.9922 b = 1.112 #> 12:19:28 Read 10153 rows and found 30 numeric columns #> 12:19:28 Using Annoy for neighbor search, n_neighbors = 30 #> 12:19:28 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 12:19:29 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//Rtmprh9z8Q/filea1e85b740dde #> 12:19:29 Searching Annoy index using 1 thread, search_k = 3000 #> 12:19:32 Annoy recall = 100% #> 12:19:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 12:19:33 Initializing from normalized Laplacian + noise (using irlba) #> 12:19:33 Commencing optimization for 200 epochs, with 410804 positive edges #> 12:19:38 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- as.data.frame(human$obs$read(   column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)  cat(paste(   \"There are\",   length(unique_cell_type_ontology_term_id),   \"cell types in the Census! The first few are:\" )) #> There are 613 cell types in the Census! The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000525\" \"CL:2000060\" \"CL:0008036\" \"CL:0002488\" \"CL:0002343\" \"CL:0000084\""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = paste(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\", sep = \"\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column as an R data frame. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id) cat(paste(   \"There are \",   length(unique_cell_type_ontology_term_id),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   sep = \"\" )) #> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048! #> The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0002063\" \"CL:0000775\" \"CL:0001044\" \"CL:0001050\" \"CL:0000814\" \"CL:0000071\"  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] cat(paste(\"The top 10 cell types where tissue_ontology_term_id ==\", LUNG_TISSUE)) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 print(top_10) #>  #> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860 CL:0000623  #>     562038     526859     323985     323610     266333     255425     205013     164944  #> CL:0001064 CL:0002632  #>     149067     132243 # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669 CL:0000003  #>     159096      84750      79618      64190      61830      32088      27515      22707  #> CL:0000057 CL:0002144  #>      20117      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)   obs_df <- as.data.frame(     census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)$concat()   )   total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(paste(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\", sep = \"\"))   } } #> [1] \"mus_musculus\" #>   Unique cell_type_ontology_term_id values: 248 #>   Unique assay_ontology_term_id values: 9 #>   Unique tissue_ontology_term_id values: 66 #> [1] \"homo_sapiens\" #>   Unique cell_type_ontology_term_id values: 613 #>   Unique assay_ontology_term_id values: 19 #>   Unique tissue_ontology_term_id values: 220 cat(paste(\"Complete Census contains\", total_cells, \"cells.\")) #> Complete Census contains 61656118 cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative (2023). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 0.0.0.9000, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative}},   year = {2023},   note = {R package version 0.0.0.9000},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"r-package-of-cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"documentation R package cellxgene.census part Census CZ CELLxGENE Discover. full details Census data capabilities please go main Census site. cellxgene.census provides API efficiently access cloud-hosted Census single-cell data R. just seconds users can access slice Census data using cell gene filters across hundreds single-cell datasets. Census data can fetched iterative fashion bigger--memory slices data, quickly exported basic R structures, well Seurat SingleCellExperiment objects downstream analysis.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"R session, first install tiledb R-Universe, latest release CRAN yet available. install cellxgene.census R-Universe.","code":"install.packages(\"tiledb\", repos = \"https://tiledb-inc.r-universe.dev\") install.packages(\"cellxgene.census\", repos = \"https://chanzuckerberg.r-universe.dev\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"CZ CELLxGENE Discover Cell Census","text":"Check vignettes “Articles” section navigation bar site. highly recommend following vignettes starting point: Querying fetching single-cell data cell/gene metadata Learning CZ CELLxGENE Discover Census can also check quick start guide main Census site.","code":""},{"path":"/index.html","id":"example-seurat-and-singlecellexperiment-query","dir":"","previous_headings":"Usage","what":"Example Seurat and SingleCellExperiment query","title":"CZ CELLxGENE Discover Cell Census","text":"following creates Seurat object -demand sympathetic neurons Census filtering genes ENSG00000161798, ENSG00000188229. following retrieves data SingleCellExperiment object.","code":"library(\"cellxgene.census\") library(\"Seurat\")  census = open_soma()  organism = \"Homo sapiens\" gene_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" cell_filter =  \"cell_type == 'sympathetic neuron'\" cell_columns = c(\"assay\", \"cell_type\", \"tissue\", \"tissue_general\", \"suspension_type\", \"disease\")  seurat_obj = get_seurat(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns ) library(\"SingleCellExperiment\")  sce_obj = get_single_cell_experiment(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns )"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please go visit main Census site. believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-07-25 #>                                                  soma.uri soma.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/soma/      us-west-2 #>                                                  h5ads.uri h5ads.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/       us-west-2 #>   do_not_delete  alias census_version #> 1          TRUE stable         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-07-25 #> latest                     2023-08-01 #> 2023-05-15                 2023-05-15 #> 2023-07-03                 2023-07-03 #> 2023-07-10                 2023-07-10 #> 2023-07-17                 2023-07-17 #> 2023-07-24                 2023-07-24 #> 2023-07-25                 2023-07-25 #> 2023-08-01                 2023-08-01 #>                                                           soma.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/soma/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/soma/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/soma/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/soma/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/soma/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #>            soma.s3_region #> stable          us-west-2 #> latest          us-west-2 #> 2023-05-15      us-west-2 #> 2023-07-03      us-west-2 #> 2023-07-10      us-west-2 #> 2023-07-17      us-west-2 #> 2023-07-24      us-west-2 #> 2023-07-25      us-west-2 #> 2023-08-01      us-west-2 #>                                                           h5ads.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/h5ads/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/h5ads/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/h5ads/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/h5ads/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/h5ads/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #>            h5ads.s3_region do_not_delete  alias #> stable           us-west-2          TRUE stable #> latest           us-west-2         FALSE latest #> 2023-05-15       us-west-2          TRUE        #> 2023-07-03       us-west-2            NA        #> 2023-07-10       us-west-2            NA        #> 2023-07-17       us-west-2            NA        #> 2023-07-24       us-west-2            NA        #> 2023-07-25       us-west-2          TRUE        #> 2023-08-01       us-west-2         FALSE"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-data-public/cell-census/2023-07-25/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(census_version_description, ...)"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(census_version = \"stable\", uri = NULL, tiledbsoma_ctx = NULL)"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused.","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417       census$close()"}]

From f657dfc931b77b680a05ce9feccdb08790b64d8e Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Thu, 3 Aug 2023 13:23:50 -0700
Subject: [PATCH 06/14] Make doc site clearer

---
 docs/cellxgene_census_docsite_quick_start.md | 2 +-
 docs/examples.rst                            | 4 ++--
 docs/index.rst                               | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index ff9dfe8fe..761a10d74 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -104,6 +104,7 @@ First we initiate a lazy-evaluation query to access all brain and male cells fro
 
 ```python
 import cellxgene_census
+import tiledbsoma
 
 with cellxgene_census.open_soma() as census:
     
@@ -142,7 +143,6 @@ And you must close the query.
 
 ## R quick start
 
-
 Below are 3 examples of common operations you can do with the Census. As a reminder, the reference documentation for the API can be accessed via `?`:
 
 ```r
diff --git a/docs/examples.rst b/docs/examples.rst
index ab01749cc..53050f6a5 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -1,4 +1,4 @@
-Tutorials
+Python tutorials
 ==========
 
 API
@@ -26,4 +26,4 @@ Experimental
     :glob:
     :maxdepth: 2
     
-    notebooks/experimental/*
\ No newline at end of file
+    notebooks/experimental/*
diff --git a/docs/index.rst b/docs/index.rst
index adfdc50de..032c7623e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,9 +7,9 @@
 
    cellxgene_census_docsite_installation.md
    cellxgene_census_docsite_quick_start.md
-   examples
    cellxgene_census_docsite_schema.md
    cellxgene_census_docsite_data_release_info.md
    python-api
-   R API <https://chanzuckerberg.github.io/cellxgene-census/r/index.html>
+   examples
+   R API & tutorials <https://chanzuckerberg.github.io/cellxgene-census/r/index.html>
    cellxgene_census_docsite_FAQ.md

From c10949858b656114c8796080245123caca3fe162 Mon Sep 17 00:00:00 2001
From: pablo-gar <pgarcia-nieto@chanzuckerberg.com>
Date: Thu, 3 Aug 2023 14:40:25 -0700
Subject: [PATCH 07/14] Update api/r/cellxgene.census/README.md

Co-authored-by: Andrew Tolopko <atolopko-czi@users.noreply.github.com>
---
 api/r/cellxgene.census/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index 616aa316e..d414b961f 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -12,7 +12,7 @@ Census data can be fetched in an iterative fashion for bigger-than-memory slices
 
 ## Installation
 
-From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
+From an R session, first install `tiledb` from R-Universe, as the latest release in CRAN is not yet available.
 
 ```r
 install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")

From 5a8b79a67cd68c3562960550fc1f6ef7fd11aefc Mon Sep 17 00:00:00 2001
From: pablo-gar <pgarcia-nieto@chanzuckerberg.com>
Date: Thu, 3 Aug 2023 14:40:37 -0700
Subject: [PATCH 08/14] Update api/r/cellxgene.census/README.md

Co-authored-by: Andrew Tolopko <atolopko-czi@users.noreply.github.com>
---
 api/r/cellxgene.census/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index d414b961f..3f61ba3ec 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -36,7 +36,7 @@ You can also check out out the [quick start guide](https://chanzuckerberg.github
 
 ### Example `Seurat` and `SingleCellExperiment` query
 
-The following creates an `Seurat` object on-demand with all sympathetic neurons in Census and filtering only for the genes `ENSG00000161798`, `ENSG00000188229`.
+The following creates a `Seurat` object on-demand with all sympathetic neurons in Census and filtering only for the genes `ENSG00000161798`, `ENSG00000188229`.
 
 ```r
 library("cellxgene.census")

From 1428d58dac96cc10395e31fea2083f07cfe6baa3 Mon Sep 17 00:00:00 2001
From: pablo-gar <pgarcia-nieto@chanzuckerberg.com>
Date: Thu, 3 Aug 2023 14:40:58 -0700
Subject: [PATCH 09/14] Update api/r/cellxgene.census/README.md

Co-authored-by: Andrew Tolopko <atolopko-czi@users.noreply.github.com>
---
 api/r/cellxgene.census/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index 3f61ba3ec..c5d05d8d2 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -4,7 +4,7 @@
 <!-- badges: start -->
 <!-- badges: end -->
 
-This is the documentation for the R package `cellxgene.census` which is part of Census by CZ CELLxGENE Discover. For full details on Census data and capabilities please go to the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/).  
+This is the documentation for the R package `cellxgene.census` which is part of CZ CELLxGENE Discover Census. For full details on Census data and capabilities please go to the [main Census site](https://chanzuckerberg.github.io/cellxgene-census/).  
 
 `cellxgene.census` provides an API to efficiently access the cloud-hosted Census single-cell data from R. In just a few seconds users can access any slice of Census data using cell or gene filters across hundreds of single-cell datasets. 
 

From 94f3db858aefa014e55fd3d4b120619c7e307599 Mon Sep 17 00:00:00 2001
From: pablo-gar <pgarcia-nieto@chanzuckerberg.com>
Date: Thu, 3 Aug 2023 14:41:17 -0700
Subject: [PATCH 10/14] Update docs/cellxgene_census_docsite_quick_start.md

Co-authored-by: Andrew Tolopko <atolopko-czi@users.noreply.github.com>
---
 docs/cellxgene_census_docsite_quick_start.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index 761a10d74..4f909dce5 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -204,7 +204,7 @@ The output is a `tibble` with over 300K cells meeting our query criteria and the
 
 ### Obtaining a slice as a `Seurat` or `SingleCellExperiment` object 
 
-The following creates an Seurat object on-demand with a smaller set of cells  and filtering only the genes `ENSG00000161798`, `ENSG00000188229`.
+The following creates a Seurat object on-demand with a smaller set of cells and filtering only the genes `ENSG00000161798`, `ENSG00000188229`.
 
 ```r
 library("cellxgene.census")

From 7ce53c479b9c1687e12749e3ce3385fae8b663f7 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Thu, 3 Aug 2023 16:33:38 -0700
Subject: [PATCH 11/14] Replace '=' with '<-'

---
 api/r/cellxgene.census/README.md             | 14 ++++-----
 docs/cellxgene_census_docsite_quick_start.md | 32 ++++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index c5d05d8d2..fbcc7f911 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -42,14 +42,14 @@ The following creates a `Seurat` object on-demand with all sympathetic neurons i
 library("cellxgene.census")
 library("Seurat")
 
-census = open_soma()
+census <- open_soma()
 
-organism = "Homo sapiens"
-gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
-cell_filter =  "cell_type == 'sympathetic neuron'"
-cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+organism <- "Homo sapiens"
+gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter <-  "cell_type == 'sympathetic neuron'"
+cell_columns <- c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
 
-seurat_obj = get_seurat(
+seurat_obj <- get_seurat(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
@@ -63,7 +63,7 @@ And the following retrieves the same data as a `SingleCellExperiment` object.
 ```r
 library("SingleCellExperiment")
 
-sce_obj = get_single_cell_experiment(
+sce_obj <- get_single_cell_experiment(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
diff --git a/docs/cellxgene_census_docsite_quick_start.md b/docs/cellxgene_census_docsite_quick_start.md
index 4f909dce5..a3684c50b 100644
--- a/docs/cellxgene_census_docsite_quick_start.md
+++ b/docs/cellxgene_census_docsite_quick_start.md
@@ -160,22 +160,22 @@ The `cellxgene.census` package uses [R6](https://r6.r-lib.org/articles/Introduct
 ```r
 library("cellxgene.census")
 
-census = open_soma()
+census <- open_soma()
 
 # Open obs SOMADataFrame
-cell_metadata = census$get("census_data")$get("homo_sapiens")$get("obs")
+cell_metadata <-  census$get("census_data")$get("homo_sapiens")$get("obs")
 
 # Read as Arrow Table
-cell_metadata = cell_metadata$read(
+cell_metadata <-  cell_metadata$read(
    value_filter = "sex == 'female' & cell_type %in% c('microglial cell', 'neuron')",
    column_names = c("assay", "cell_type", "sex", "tissue", "tissue_general", "suspension_type", "disease")
 )
 
 # Concatenates results to an Arrow Table
-cell_metadata = cell_metadata$concat()
+cell_metadata <-  cell_metadata$concat()
 
 # Convert to R tibble (dataframe)
-cell_metadata = as.data.frame(cell_metadata)
+cell_metadata <-  as.data.frame(cell_metadata)
 
 print(cell_metadata)
 
@@ -210,14 +210,14 @@ The following creates a Seurat object on-demand with a smaller set of cells and
 library("cellxgene.census")
 library("Seurat")
 
-census = open_soma()
+census <-  open_soma()
 
-organism = "Homo sapiens"
-gene_filter = "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
-cell_filter =  "cell_type == 'sympathetic neuron'"
-cell_columns = c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
+organism <-  "Homo sapiens"
+gene_filter <-  "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"
+cell_filter <-   "cell_type == 'sympathetic neuron'"
+cell_columns <-  c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease")
 
-seurat_obj = get_seurat(
+seurat_obj <-  get_seurat(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
@@ -241,7 +241,7 @@ Similarly a `SingleCellExperiment` object can be created.
 ```r
 library("SingleCellExperiment")
 
-sce_obj = get_single_cell_experiment(
+sce_obj <-  get_single_cell_experiment(
    census = census,
    organism = organism,
    var_value_filter = gene_filter,
@@ -279,8 +279,8 @@ First we initiate a lazy-evaluation query to access all brain and male cells fro
 library("cellxgene.census")
 library("tiledbsoma")
     
-human = census$get("census_data")$get("homo_sapiens")
-query = human$axis_query(
+human <-  census$get("census_data")$get("homo_sapiens")
+query <-  human$axis_query(
   measurement_name = "RNA",
   obs_query = SOMAAxisQuery$new(
     value_filter = "tissue == 'brain' & sex == 'male'"
@@ -297,11 +297,11 @@ Now we can iterate over the matrix count, as well as the cell and gene metadata.
 ```r
 # Continued from above 
 
-iterator = query$X("raw")$tables()
+iterator <-  query$X("raw")$tables()
 # For sparse matrices use query$X("raw")$sparse_matrix()
 
 # Get an iterative slice as an Arrow Table
-raw_slice = iterator$read_next() 
+raw_slice <-  iterator$read_next() 
 
 #...
 ```

From b1f3051e683eeac0d964b8da60cc616b3d3e0a5d Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Thu, 3 Aug 2023 16:43:27 -0700
Subject: [PATCH 12/14] Update install instructions

---
 api/r/cellxgene.census/README.md              | 11 +++++++++--
 docs/cellxgene_census_docsite_installation.md | 13 ++++++++++---
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index fbcc7f911..3bc3599c0 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -15,13 +15,20 @@ Census data can be fetched in an iterative fashion for bigger-than-memory slices
 From an R session, first install `tiledb` from R-Universe, as the latest release in CRAN is not yet available.
 
 ```r
-install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")
+install.packages(
+  "tiledb",
+  version = "0.20.2", 
+  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+)
 ```
 
 Then install `cellxgene.census` from R-Universe.
 
 ```r
-install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev")
+install.packages(
+  "cellxgene.census",
+  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+)
 ```
 
 ## Usage
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 44a9de9cb..629a259ec 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -32,11 +32,18 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u
 From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
 ```r
-install.packages("tiledb", repos = "https://tiledb-inc.r-universe.dev")
+install.packages(
+  "tiledb",
+  version = "0.20.2", 
+  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+)
 ```
 
 Then install `cellxgene.census` from R-Universe.
 
 ```r
-install.packages("cellxgene.census", repos = "https://chanzuckerberg.r-universe.dev")
-```
+install.packages(
+  "cellxgene.census",
+  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+)
+```
\ No newline at end of file

From 39eda121075dc63f1a8595aa7853fd83bdd67a58 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Fri, 4 Aug 2023 10:46:47 -0700
Subject: [PATCH 13/14] Fix installation instructions

---
 api/r/cellxgene.census/README.md              | 6 ++++--
 docs/cellxgene_census_docsite_installation.md | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index 3bc3599c0..84cb0c24e 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -12,7 +12,9 @@ Census data can be fetched in an iterative fashion for bigger-than-memory slices
 
 ## Installation
 
-From an R session, first install `tiledb` from R-Universe, as the latest release in CRAN is not yet available.
+From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
+
+If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`
 
 ```r
 install.packages(
@@ -27,7 +29,7 @@ Then install `cellxgene.census` from R-Universe.
 ```r
 install.packages(
   "cellxgene.census",
-  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+  repos=c('https://chanzuckerberg.r-universe.dev', 'https://cloud.r-project.org') 
 )
 ```
 
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 629a259ec..de1bf4099 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -31,6 +31,8 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u
 
 From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
+If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`
+
 ```r
 install.packages(
   "tiledb",
@@ -44,6 +46,6 @@ Then install `cellxgene.census` from R-Universe.
 ```r
 install.packages(
   "cellxgene.census",
-  repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
+  repos=c('https://chanzuckerberg.r-universe.dev', 'https://cloud.r-project.org') 
 )
 ```
\ No newline at end of file

From 200f36176e7591b78cf7127bae8d000746ebc8e2 Mon Sep 17 00:00:00 2001
From: Pablo E Garcia-Nieto <paedugar@gmail.com>
Date: Fri, 4 Aug 2023 12:51:44 -0700
Subject: [PATCH 14/14] Update installation instructions, and R version

---
 api/r/cellxgene.census/DESCRIPTION            |  2 +-
 api/r/cellxgene.census/README.md              |  6 ++--
 api/r/cellxgene.census/docs/404.html          |  2 +-
 api/r/cellxgene.census/docs/LICENSE-text.html |  2 +-
 api/r/cellxgene.census/docs/LICENSE.html      |  2 +-
 .../articles/census_dataset_presence.html     |  4 +--
 .../docs/articles/census_datasets.html        |  4 +--
 .../articles/comp_bio_data_integration.html   | 22 ++++++-------
 .../comp_bio_summarize_axis_query.html        |  4 +--
 .../cellxgene.census/docs/articles/index.html |  2 +-
 api/r/cellxgene.census/docs/authors.html      |  6 ++--
 api/r/cellxgene.census/docs/index.html        | 33 ++++++++++++-------
 api/r/cellxgene.census/docs/pkgdown.yml       |  2 +-
 .../docs/reference/index.html                 |  2 +-
 api/r/cellxgene.census/docs/search.json       |  2 +-
 docs/cellxgene_census_docsite_installation.md |  6 ++--
 16 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/api/r/cellxgene.census/DESCRIPTION b/api/r/cellxgene.census/DESCRIPTION
index 1eb6b485c..129792d1c 100644
--- a/api/r/cellxgene.census/DESCRIPTION
+++ b/api/r/cellxgene.census/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cellxgene.census
 Title: CZ CELLxGENE Discover Cell Census
-Version: 0.0.0.9000
+Version: 0.0.9.9
 Authors@R: 
     person("Chan Zuckerberg Initiative", email = "soma@chanzuckerberg.com",
            role = c("aut", "cre", "cph", "fnd"))
diff --git a/api/r/cellxgene.census/README.md b/api/r/cellxgene.census/README.md
index 84cb0c24e..733550f78 100644
--- a/api/r/cellxgene.census/README.md
+++ b/api/r/cellxgene.census/README.md
@@ -14,12 +14,14 @@ Census data can be fetched in an iterative fashion for bigger-than-memory slices
 
 From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
-If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`
+If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`.
+
+If installing from MacOS, you will need to install the [developer tools `Xcode`](https://apps.apple.com/us/app/xcode/id497799835?mt=12).
 
 ```r
 install.packages(
   "tiledb",
-  version = "0.20.2", 
+  version = "0.20.3", 
   repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
 )
 ```
diff --git a/api/r/cellxgene.census/docs/404.html b/api/r/cellxgene.census/docs/404.html
index dfb087811..152015e2d 100644
--- a/api/r/cellxgene.census/docs/404.html
+++ b/api/r/cellxgene.census/docs/404.html
@@ -30,7 +30,7 @@
     
     <a class="navbar-brand me-2" href="index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/api/r/cellxgene.census/docs/LICENSE-text.html b/api/r/cellxgene.census/docs/LICENSE-text.html
index 4191f68a5..341b9771e 100644
--- a/api/r/cellxgene.census/docs/LICENSE-text.html
+++ b/api/r/cellxgene.census/docs/LICENSE-text.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/api/r/cellxgene.census/docs/LICENSE.html b/api/r/cellxgene.census/docs/LICENSE.html
index 2a4a9fb54..ca30efca8 100644
--- a/api/r/cellxgene.census/docs/LICENSE.html
+++ b/api/r/cellxgene.census/docs/LICENSE.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/api/r/cellxgene.census/docs/articles/census_dataset_presence.html b/api/r/cellxgene.census/docs/articles/census_dataset_presence.html
index 6b09a6bd9..f87673a4a 100644
--- a/api/r/cellxgene.census/docs/articles/census_dataset_presence.html
+++ b/api/r/cellxgene.census/docs/articles/census_dataset_presence.html
@@ -184,10 +184,10 @@ <h2 id="identifying-genes-measured-in-a-specific-dataset">Identifying genes meas
 <li>the first dimension of the presence matrix is the dataset’s <code>soma_joinid</code>, as stored in the <code>census_datasets</code> dataframe.</li>
 <li>the second dimension of the presence matrix is the feature’s <code>soma_joinid</code>, as stored in the <code>var</code> dataframe.</li>
 </ul>
-<p>The presence matrix has a method <code>$take()</code> that let’s you slice it by the <code>soma_joinid</code> from dataset and var joinid. And the full presence matrix or slices of it can be then exported to a regular matrix with the method <code>$get_one_based_matrix()</code></p>
+<p>The presence matrix has a method <code>$take()</code> that lets you slice it by <code>soma_joinid</code>s from <code>census_datasets</code> and <code>var</code>. And the full presence matrix, or slices of it, can then be exported to a regular matrix with the method <code>$get_one_based_matrix()</code></p>
 <p>Let’s find out if the the gene <code>"ENSG00000286096"</code> was measured in the dataset with id <code>"97a17473-e2b1-4f31-a544-44a60773e2dd"</code>.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="co"># Get somo_joinid for datasets and genes of interest</span></span>
+<code class="sourceCode R"><span><span class="co"># Get soma_joinid for datasets and genes of interest</span></span>
 <span><span class="va">var_joinid</span> <span class="op">&lt;-</span> <span class="va">var_df</span><span class="op">$</span><span class="va">soma_joinid</span><span class="op">[</span><span class="va">var_df</span><span class="op">$</span><span class="va">feature_id</span> <span class="op">==</span> <span class="st">"ENSG00000286096"</span><span class="op">]</span></span>
 <span><span class="va">dataset_joinid</span> <span class="op">&lt;-</span> <span class="va">datasets_df</span><span class="op">$</span><span class="va">soma_joinid</span><span class="op">[</span><span class="va">datasets_df</span><span class="op">$</span><span class="va">dataset_id</span> <span class="op">==</span> <span class="st">"97a17473-e2b1-4f31-a544-44a60773e2dd"</span><span class="op">]</span></span>
 <span></span>
diff --git a/api/r/cellxgene.census/docs/articles/census_datasets.html b/api/r/cellxgene.census/docs/articles/census_datasets.html
index bb88b9c1d..4c334f2ac 100644
--- a/api/r/cellxgene.census/docs/articles/census_datasets.html
+++ b/api/r/cellxgene.census/docs/articles/census_datasets.html
@@ -194,8 +194,8 @@ <h2 id="fetching-the-expression-data-from-a-single-dataset">Fetching the express
 <span><span class="va">obs_query</span> <span class="op">&lt;-</span> <span class="va"><a href="https://rdrr.io/pkg/tiledbsoma/man/SOMAAxisQuery.html" class="external-link">SOMAAxisQuery</a></span><span class="op">$</span><span class="fu">new</span><span class="op">(</span></span>
 <span>  value_filter <span class="op">=</span> <span class="st">"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'"</span></span>
 <span><span class="op">)</span></span>
-<span><span class="va">expt_query</span> <span class="op">&lt;-</span><span class="va">census_data</span><span class="op">$</span><span class="fu">get</span><span class="op">(</span><span class="st">"mus_musculus"</span><span class="op">)</span><span class="op">$</span><span class="fu">axis_query</span><span class="op">(</span></span>
-<span>  measurement_name <span class="op">=</span><span class="st">"RNA"</span>,</span>
+<span><span class="va">expt_query</span> <span class="op">&lt;-</span> <span class="va">census_data</span><span class="op">$</span><span class="fu">get</span><span class="op">(</span><span class="st">"mus_musculus"</span><span class="op">)</span><span class="op">$</span><span class="fu">axis_query</span><span class="op">(</span></span>
+<span>  measurement_name <span class="op">=</span> <span class="st">"RNA"</span>,</span>
 <span>  obs_query <span class="op">=</span> <span class="va">obs_query</span></span>
 <span><span class="op">)</span></span>
 <span><span class="va">dataset_seurat</span> <span class="op">&lt;-</span> <span class="va">expt_query</span><span class="op">$</span><span class="fu">to_seurat</span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span>counts <span class="op">=</span> <span class="st">"raw"</span><span class="op">)</span><span class="op">)</span></span>
diff --git a/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html b/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
index 5fedb71c8..4e4b81e98 100644
--- a/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
+++ b/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
@@ -293,20 +293,20 @@ <h4 id="integration-across-datasets-using-dataset_id-and-controlling-for-batch-u
 <span><span class="co"># Run the standard workflow for visualization and clustering</span></span>
 <span><span class="va">seurat_obj.combined</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://satijalab.org/seurat/reference/RunPCA.html" class="external-link">RunPCA</a></span><span class="op">(</span><span class="va">seurat_obj.combined</span>, npcs <span class="op">=</span> <span class="fl">30</span>, verbose <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
 <span><span class="va">seurat_obj.combined</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://satijalab.org/seurat/reference/RunUMAP.html" class="external-link">RunUMAP</a></span><span class="op">(</span><span class="va">seurat_obj.combined</span>, reduction <span class="op">=</span> <span class="st">"pca"</span>, dims <span class="op">=</span> <span class="fl">1</span><span class="op">:</span><span class="fl">30</span><span class="op">)</span></span>
-<span><span class="co">#&gt; 16:10:11 UMAP embedding parameters a = 0.9922 b = 1.112</span></span>
-<span><span class="co">#&gt; 16:10:11 Read 10153 rows and found 30 numeric columns</span></span>
-<span><span class="co">#&gt; 16:10:11 Using Annoy for neighbor search, n_neighbors = 30</span></span>
-<span><span class="co">#&gt; 16:10:11 Building Annoy index with metric = cosine, n_trees = 50</span></span>
+<span><span class="co">#&gt; 12:40:57 UMAP embedding parameters a = 0.9922 b = 1.112</span></span>
+<span><span class="co">#&gt; 12:40:57 Read 10153 rows and found 30 numeric columns</span></span>
+<span><span class="co">#&gt; 12:40:57 Using Annoy for neighbor search, n_neighbors = 30</span></span>
+<span><span class="co">#&gt; 12:40:57 Building Annoy index with metric = cosine, n_trees = 50</span></span>
 <span><span class="co">#&gt; 0%   10   20   30   40   50   60   70   80   90   100%</span></span>
 <span><span class="co">#&gt; [----|----|----|----|----|----|----|----|----|----|</span></span>
 <span><span class="co">#&gt; **************************************************|</span></span>
-<span><span class="co">#&gt; 16:10:13 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//RtmpJsaOXU/file122861c13d73b</span></span>
-<span><span class="co">#&gt; 16:10:13 Searching Annoy index using 1 thread, search_k = 3000</span></span>
-<span><span class="co">#&gt; 16:10:16 Annoy recall = 100%</span></span>
-<span><span class="co">#&gt; 16:10:16 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30</span></span>
-<span><span class="co">#&gt; 16:10:17 Initializing from normalized Laplacian + noise (using irlba)</span></span>
-<span><span class="co">#&gt; 16:10:18 Commencing optimization for 200 epochs, with 410804 positive edges</span></span>
-<span><span class="co">#&gt; 16:10:23 Optimization finished</span></span></code></pre></div>
+<span><span class="co">#&gt; 12:40:59 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//Rtmpg3DKZz/file41847a77c42b</span></span>
+<span><span class="co">#&gt; 12:40:59 Searching Annoy index using 1 thread, search_k = 3000</span></span>
+<span><span class="co">#&gt; 12:41:02 Annoy recall = 100%</span></span>
+<span><span class="co">#&gt; 12:41:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30</span></span>
+<span><span class="co">#&gt; 12:41:03 Initializing from normalized Laplacian + noise (using irlba)</span></span>
+<span><span class="co">#&gt; 12:41:03 Commencing optimization for 200 epochs, with 410804 positive edges</span></span>
+<span><span class="co">#&gt; 12:41:09 Optimization finished</span></span></code></pre></div>
 <p>Plot the UMAP.</p>
 <div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="co"># By assay</span></span>
diff --git a/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html b/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
index 62c4eb5f2..6aac08496 100644
--- a/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
+++ b/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
@@ -144,7 +144,7 @@ <h3 id="example-summarize-all-cell-types">Example: Summarize all cell types<a cl
 <span><span class="va">obs_df</span> <span class="op">&lt;-</span> <span class="va">human</span><span class="op">$</span><span class="va">obs</span><span class="op">$</span><span class="fu">read</span><span class="op">(</span>column_names <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"cell_type"</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="va">obs_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="va">obs_df</span><span class="op">$</span><span class="fu">concat</span><span class="op">(</span><span class="op">)</span><span class="op">)</span></span>
 <span></span>
-<span><span class="co"># Find all unique values in the cell_type_ontology_term_id column.</span></span>
+<span><span class="co"># Find all unique values in the cell_type column.</span></span>
 <span><span class="va">unique_cell_type</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/unique.html" class="external-link">unique</a></span><span class="op">(</span><span class="va">obs_df</span><span class="op">$</span><span class="va">cell_type</span><span class="op">)</span></span>
 <span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/cat.html" class="external-link">cat</a></span><span class="op">(</span></span>
@@ -166,7 +166,7 @@ <h3 id="example-summarize-a-subset-of-cell-types-selected-with-a-value_filter">E
 <span><span class="va">obs_df</span> <span class="op">&lt;-</span> <span class="va">human</span><span class="op">$</span><span class="va">obs</span><span class="op">$</span><span class="fu">read</span><span class="op">(</span>column_names <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"cell_type"</span><span class="op">)</span>, value_filter <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/paste.html" class="external-link">paste0</a></span><span class="op">(</span><span class="st">"tissue_ontology_term_id == '"</span>, <span class="va">LUNG_TISSUE</span>, <span class="st">"'"</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="va">obs_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="va">obs_df</span><span class="op">$</span><span class="fu">concat</span><span class="op">(</span><span class="op">)</span><span class="op">)</span></span>
 <span></span>
-<span><span class="co"># Find all unique values in the cell_type_ontology_term_id column as an R data frame.</span></span>
+<span><span class="co"># Find all unique values in the cell_type column as an R data frame.</span></span>
 <span><span class="va">unique_cell_type</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/unique.html" class="external-link">unique</a></span><span class="op">(</span><span class="va">obs_df</span><span class="op">$</span><span class="va">cell_type</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/cat.html" class="external-link">cat</a></span><span class="op">(</span></span>
 <span>  <span class="st">"There are "</span>,</span>
diff --git a/api/r/cellxgene.census/docs/articles/index.html b/api/r/cellxgene.census/docs/articles/index.html
index 212ce432c..953599a5e 100644
--- a/api/r/cellxgene.census/docs/articles/index.html
+++ b/api/r/cellxgene.census/docs/articles/index.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/api/r/cellxgene.census/docs/authors.html b/api/r/cellxgene.census/docs/authors.html
index 001f3dd2d..5b522c091 100644
--- a/api/r/cellxgene.census/docs/authors.html
+++ b/api/r/cellxgene.census/docs/authors.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -72,13 +72,13 @@ <h2 id="citation">Citation</h2>
 
       <p>Chan Zuckerberg Initiative (2023).
 <em>cellxgene.census: CZ CELLxGENE Discover Cell Census</em>.
-R package version 0.0.0.9000, <a href="https://github.com/chanzuckerberg/cellxgene-census" class="external-link">https://github.com/chanzuckerberg/cellxgene-census</a>. 
+R package version 0.0.9.9, <a href="https://github.com/chanzuckerberg/cellxgene-census" class="external-link">https://github.com/chanzuckerberg/cellxgene-census</a>. 
 </p>
       <pre>@Manual{,
   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},
   author = {{Chan Zuckerberg Initiative}},
   year = {2023},
-  note = {R package version 0.0.0.9000},
+  note = {R package version 0.0.9.9},
   url = {https://github.com/chanzuckerberg/cellxgene-census},
 }</pre>
     </div>
diff --git a/api/r/cellxgene.census/docs/index.html b/api/r/cellxgene.census/docs/index.html
index 25922b658..dce3cd52b 100644
--- a/api/r/cellxgene.census/docs/index.html
+++ b/api/r/cellxgene.census/docs/index.html
@@ -36,7 +36,7 @@
     
     <a class="navbar-brand me-2" href="index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -87,18 +87,27 @@ <h6 class="dropdown-header" data-toc-skip>cellxgene.census capabilities</h6>
   <main id="main" class="col-md-9"><div class="section level1">
 <div class="page-header"><h1 id="r-package-of-cz-cellxgene-discover-census">R package of CZ CELLxGENE Discover Census<a class="anchor" aria-label="anchor" href="#r-package-of-cz-cellxgene-discover-census"></a>
 </h1></div>
-<!-- badges: start --><!-- badges: end --><p>This is the documentation for the R package <code>cellxgene.census</code> which is part of Census by CZ CELLxGENE Discover. For full details on Census data and capabilities please go to the <a href="https://chanzuckerberg.github.io/cellxgene-census/" class="external-link">main Census site</a>.</p>
+<!-- badges: start --><!-- badges: end --><p>This is the documentation for the R package <code>cellxgene.census</code> which is part of CZ CELLxGENE Discover Census. For full details on Census data and capabilities please go to the <a href="https://chanzuckerberg.github.io/cellxgene-census/" class="external-link">main Census site</a>.</p>
 <p><code>cellxgene.census</code> provides an API to efficiently access the cloud-hosted Census single-cell data from R. In just a few seconds users can access any slice of Census data using cell or gene filters across hundreds of single-cell datasets.</p>
 <p>Census data can be fetched in an iterative fashion for bigger-than-memory slices of data, or quickly exported to basic R structures, as well as <code>Seurat</code> or <code>SingleCellExperiment</code> objects for downstream analysis.</p>
 <div class="section level2">
 <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
 </h2>
 <p>From an R session, first install <code>tiledb</code> from R-Universe, the latest release in CRAN is not yet available.</p>
+<p>If installing from Ubuntu, you may need to install the following libraries via <code>apt install</code>: <code>libxml2-dev</code> <code>libssl-dev</code> <code>libcurl4-openssl-dev</code>.</p>
+<p>If installing from MacOS, you will need to install the <a href="https://apps.apple.com/us/app/xcode/id497799835?mt=12" class="external-link">developer tools <code>Xcode</code></a>.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span><span class="st">"tiledb"</span>, repos <span class="op">=</span> <span class="st">"https://tiledb-inc.r-universe.dev"</span><span class="op">)</span></span></code></pre></div>
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span></span>
+<span>  <span class="st">"tiledb"</span>,</span>
+<span>  version <span class="op">=</span> <span class="st">"0.20.3"</span>, </span>
+<span>  repos<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">'https://tiledb-inc.r-universe.dev'</span>,<span class="st">'https://cloud.r-project.org'</span><span class="op">)</span> </span>
+<span><span class="op">)</span></span></code></pre></div>
 <p>Then install <code>cellxgene.census</code> from R-Universe.</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span><span class="st">"cellxgene.census"</span>, repos <span class="op">=</span> <span class="st">"https://chanzuckerberg.r-universe.dev"</span><span class="op">)</span></span></code></pre></div>
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span></span>
+<span>  <span class="st">"cellxgene.census"</span>,</span>
+<span>  repos<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">'https://chanzuckerberg.r-universe.dev'</span>, <span class="st">'https://cloud.r-project.org'</span><span class="op">)</span> </span>
+<span><span class="op">)</span></span></code></pre></div>
 </div>
 <div class="section level2">
 <h2 id="usage">Usage<a class="anchor" aria-label="anchor" href="#usage"></a>
@@ -112,19 +121,19 @@ <h2 id="usage">Usage<a class="anchor" aria-label="anchor" href="#usage"></a>
 <div class="section level3">
 <h3 id="example-seurat-and-singlecellexperiment-query">Example <code>Seurat</code> and <code>SingleCellExperiment</code> query<a class="anchor" aria-label="anchor" href="#example-seurat-and-singlecellexperiment-query"></a>
 </h3>
-<p>The following creates an <code>Seurat</code> object on-demand with all sympathetic neurons in Census and filtering only for the genes <code>ENSG00000161798</code>, <code>ENSG00000188229</code>.</p>
+<p>The following creates a <code>Seurat</code> object on-demand with all sympathetic neurons in Census and filtering only for the genes <code>ENSG00000161798</code>, <code>ENSG00000188229</code>.</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st"><a href="https://github.com/chanzuckerberg/cellxgene-census" class="external-link">"cellxgene.census"</a></span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st"><a href="https://satijalab.org/seurat" class="external-link">"Seurat"</a></span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">census</span> <span class="op">=</span> <span class="fu"><a href="reference/open_soma.html">open_soma</a></span><span class="op">(</span><span class="op">)</span></span>
+<span><span class="va">census</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/open_soma.html">open_soma</a></span><span class="op">(</span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">organism</span> <span class="op">=</span> <span class="st">"Homo sapiens"</span></span>
-<span><span class="va">gene_filter</span> <span class="op">=</span> <span class="st">"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"</span></span>
-<span><span class="va">cell_filter</span> <span class="op">=</span>  <span class="st">"cell_type == 'sympathetic neuron'"</span></span>
-<span><span class="va">cell_columns</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay"</span>, <span class="st">"cell_type"</span>, <span class="st">"tissue"</span>, <span class="st">"tissue_general"</span>, <span class="st">"suspension_type"</span>, <span class="st">"disease"</span><span class="op">)</span></span>
+<span><span class="va">organism</span> <span class="op">&lt;-</span> <span class="st">"Homo sapiens"</span></span>
+<span><span class="va">gene_filter</span> <span class="op">&lt;-</span> <span class="st">"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')"</span></span>
+<span><span class="va">cell_filter</span> <span class="op">&lt;-</span>  <span class="st">"cell_type == 'sympathetic neuron'"</span></span>
+<span><span class="va">cell_columns</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay"</span>, <span class="st">"cell_type"</span>, <span class="st">"tissue"</span>, <span class="st">"tissue_general"</span>, <span class="st">"suspension_type"</span>, <span class="st">"disease"</span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">seurat_obj</span> <span class="op">=</span> <span class="fu"><a href="reference/get_seurat.html">get_seurat</a></span><span class="op">(</span></span>
+<span><span class="va">seurat_obj</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/get_seurat.html">get_seurat</a></span><span class="op">(</span></span>
 <span>   census <span class="op">=</span> <span class="va">census</span>,</span>
 <span>   organism <span class="op">=</span> <span class="va">organism</span>,</span>
 <span>   var_value_filter <span class="op">=</span> <span class="va">gene_filter</span>,</span>
@@ -135,7 +144,7 @@ <h3 id="example-seurat-and-singlecellexperiment-query">Example <code>Seurat</cod
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="st">"SingleCellExperiment"</span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">sce_obj</span> <span class="op">=</span> <span class="fu"><a href="reference/get_single_cell_experiment.html">get_single_cell_experiment</a></span><span class="op">(</span></span>
+<span><span class="va">sce_obj</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/get_single_cell_experiment.html">get_single_cell_experiment</a></span><span class="op">(</span></span>
 <span>   census <span class="op">=</span> <span class="va">census</span>,</span>
 <span>   organism <span class="op">=</span> <span class="va">organism</span>,</span>
 <span>   var_value_filter <span class="op">=</span> <span class="va">gene_filter</span>,</span>
diff --git a/api/r/cellxgene.census/docs/pkgdown.yml b/api/r/cellxgene.census/docs/pkgdown.yml
index 8bb89f2ff..23f1b0ad1 100644
--- a/api/r/cellxgene.census/docs/pkgdown.yml
+++ b/api/r/cellxgene.census/docs/pkgdown.yml
@@ -8,5 +8,5 @@ articles:
   comp_bio_census_info: comp_bio_census_info.html
   comp_bio_data_integration: comp_bio_data_integration.html
   comp_bio_summarize_axis_query: comp_bio_summarize_axis_query.html
-last_built: 2023-08-03T22:40Z
+last_built: 2023-08-04T19:50Z
 
diff --git a/api/r/cellxgene.census/docs/reference/index.html b/api/r/cellxgene.census/docs/reference/index.html
index b8db002f0..b91e62bf8 100644
--- a/api/r/cellxgene.census/docs/reference/index.html
+++ b/api/r/cellxgene.census/docs/reference/index.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">cellxgene.census</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.0.9000</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.9.9</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/api/r/cellxgene.census/docs/search.json b/api/r/cellxgene.census/docs/search.json
index cad4f3ad8..f9509c152 100644
--- a/api/r/cellxgene.census/docs/search.json
+++ b/api/r/cellxgene.census/docs/search.json
@@ -1 +1 @@
-[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #> # A tibble: 593 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           0 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… f171db61-… donor_p13_tr… #>  2           1 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… ecf2e08e-… All donors t… #>  3           2 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… 74cff64f-… All donors a… #>  4           3 f7cecffa-00b4-45… Mapping single… 10.1016/j.cce… 5af90777-… Single-cell … #>  5           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  6           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  7           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  8           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  9           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #> 10           9 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d8732da6-… Tabula Sapie… #> # ℹ 583 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience API providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- cellxgene.census::get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #> # A tibble: 60,664 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           6 ENSG00000256661 A2ML1-AS1               452 #>  8           7 ENSG00000184389 A3GALT2                1023 #>  9           8 ENSG00000128274 A4GALT                 3358 #> 10           9 ENSG00000118017 A4GNT                  1779 #> # ℹ 60,654 more rows"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"] is_present <- presence_matrix$take(dataset_joinid, var_joinid)$get_one_based_matrix()[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. dataset_joinids <- datasets_df$soma_joinid[   presence_matrix$take(j = var_joinid)$get_one_based_matrix()[, , drop = TRUE] != 0 ]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #> # A tibble: 42 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  2           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  3           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  4           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  5           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #>  6          10 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d77ec7d6-… Tabula Sapie… #>  7          11 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… cee11228-… Tabula Sapie… #>  8          13 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a2d4d33e-… Tabula Sapie… #>  9          14 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a0754256-… Tabula Sapie… #> 10          18 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… 6d41668c-… Tabula Sapie… #> # ℹ 32 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset var_joinids <- var_df$soma_joinid[   which(Matrix::colSums(presence_matrix$take(i = dataset_joinids)$get_one_based_matrix()[, , drop = TRUE]) > 0) ]  print(var_df[var_joinids, ]) #> # A tibble: 27,210 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           7 ENSG00000184389 A3GALT2                1023 #>  8           8 ENSG00000128274 A4GALT                 3358 #>  9           9 ENSG00000118017 A4GNT                  1779 #> 10          10 ENSG00000094914 AAAS                   4727 #> # ℹ 27,200 more rows"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 593 rows x 8 columns #> $soma_joinid <int64 not null> #> $collection_id <large_string not null> #> $collection_name <large_string not null> #> $collection_doi <large_string not null> #> $dataset_id <large_string not null> #> $dataset_title <large_string not null> #> $dataset_h5ad_path <large_string not null> #> $dataset_total_cell_count <int64 not null> census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #> # A tibble: 593 × 3 #>    dataset_id                           dataset_title             dataset_total_cell_c…¹ #>    <chr>                                <chr>                                      <int> #>  1 f171db61-e57e-4535-a06a-35d8b6ef8f2b donor_p13_trophoblasts                     31497 #>  2 ecf2e08e-2032-4a9e-b466-b65b395f4a02 All donors trophoblasts                    67070 #>  3 74cff64f-9da9-4b2a-9b3b-8a04a1598040 All donors all cell stat…                 286326 #>  4 5af90777-6760-4003-9dba-8f945fec6fdf Single-cell transcriptom…                 270855 #>  5 bd65a70f-b274-4133-b9dd-0d1431b6af34 Single-cell sequencing l…                 167283 #>  6 ff45e623-7f5f-46e3-b47d-56be0341f66b Tabula Sapiens - Pancreas                  13497 #>  7 f01bdd17-4902-40f5-86e3-240d66dd2587 Tabula Sapiens - Salivar…                  27199 #>  8 e6a11140-2545-46bc-929e-da243eed2cae Tabula Sapiens - Heart                     11505 #>  9 e5c63d94-593c-4338-a489-e1048599e751 Tabula Sapiens - Bladder                   24583 #> 10 d8732da6-8d1d-42d9-b625-f2416c30054b Tabula Sapiens - Trachea                    9522 #> # ℹ 583 more rows #> # ℹ abbreviated name: ¹​dataset_total_cell_count census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(it) census_data$get(it$name)) print(all_experiments) #> $mus_musculus #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms*  #>  #> $homo_sapiens #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(it) {   nrow(it$obs$read(column_names = c(\"soma_joinid\"))$concat()) })) print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 61656118 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 61656118 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #> # A tibble: 1 × 8 #>   soma_joinid collection_id      collection_name collection_doi dataset_id dataset_title #>         <int> <chr>              <chr>           <chr>          <chr>      <chr>         #> 1         522 0b9d8a04-bb9d-44d… Tabula Muris S… 10.1038/s4158… 0bd1a1de-… Bone marrow … #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int> obs_query <- tiledbsoma::SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- tiledbsoma::SOMAExperimentAxisQuery$new(   census_data$get(\"mus_musculus\"), \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) #> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package, #> which was just loaded, will retire in October 2023. #> Please refer to R-spatial evolution reports for details, especially #> https://r-spatial.org/r/2023/05/15/evolution4.html. #> It may be desirable to make the sf package available; #> package maintainers should consider adding sf to Suggests:. #> The sp package is now running under evolution status 2 #>      (status 2 uses the sf package in place of rgdal) print(dataset_seurat) #> An object of class Seurat  #> 52392 features across 40220 samples within 1 assay  #> Active assay: RNA (52392 features, 0 variable features)"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download cellxgene.census::download_source_h5ad(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method cellxgene.census::get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one <column> <op> <value> <column> <op> <column> Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #> # A tibble: 3 × 1 #>   sex     #>   <chr>   #> 1 unknown #> 2 female  #> 3 male as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #> # A tibble: 3,251,334 × 21 #>    soma_joinid dataset_id  assay assay_ontology_term_id cell_type cell_type_ontology_t…¹ #>          <int> <chr>       <chr> <chr>                  <chr>     <chr>                  #>  1           0 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  2           1 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  3           2 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  4           3 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  5           4 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  6           5 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  7           6 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  8           7 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  9           8 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #> 10           9 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #> # ℹ 3,251,324 more rows #> # ℹ abbreviated name: ¹​cell_type_ontology_term_id #> # ℹ 15 more variables: development_stage <chr>, #> #   development_stage_ontology_term_id <chr>, disease <chr>, #> #   disease_ontology_term_id <chr>, donor_id <chr>, is_primary_data <lgl>, #> #   self_reported_ethnicity <chr>, self_reported_ethnicity_ontology_term_id <chr>, #> #   sex <chr>, sex_ontology_term_id <chr>, suspension_type <chr>, tissue <chr>, … cell_metadata_b_cell <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",     column_names = \"disease\"   )$concat() ) table(cell_metadata_b_cell) #> disease #> chronic obstructive pulmonary disease                              COVID-19  #>                                  6369                                  2729  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(     value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",     column_names = c(\"feature_name\", \"feature_length\")   )$concat() ) #> # A tibble: 2 × 2 #>   feature_name feature_length #>   <chr>                 <int> #> 1 AQP5                   1884 #> 2 TUBB4B                 2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- cellxgene.census::get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) head(seurat_obj[[]]) #>                orig.ident nCount_RNA nFeature_RNA cell_type tissue_general  disease #> cell5946018 SeuratProject          0            0    B cell           lung COVID-19 #> cell5948526 SeuratProject          1            1    B cell           lung COVID-19 #> cell5949180 SeuratProject          0            0    B cell           lung COVID-19 #> cell5949686 SeuratProject          0            0    B cell           lung COVID-19 #> cell5950730 SeuratProject          0            0    B cell           lung COVID-19 #> cell5952197 SeuratProject          0            0    B cell           lung COVID-19 #>                 sex #> cell5946018    male #> cell5948526 unknown #> cell5949180    male #> cell5949686 unknown #> cell5950730    male #> cell5952197 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- cellxgene.census::get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(2): feature_name feature_length #> colnames(2729): obs5946018 obs5948526 ... obs48038140 obs48038164 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>              cell_type tissue_general     disease         sex #>            <character>    <character> <character> <character> #> obs5946018      B cell           lung    COVID-19        male #> obs5948526      B cell           lung    COVID-19     unknown #> obs5949180      B cell           lung    COVID-19        male #> obs5949686      B cell           lung    COVID-19     unknown #> obs5950730      B cell           lung    COVID-19        male #> obs5952197      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 2 columns #>                 feature_name feature_length #>                  <character>      <integer> #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #> # A tibble: 6 × 7 #>   soma_joinid organism     category ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>    <chr>                        <int>            <int> #> 1           0 Homo sapiens all      na                        33364242         56400873 #> 2           1 Homo sapiens assay    EFO:0008722                 264166           279635 #> 3           2 Homo sapiens assay    EFO:0008780                  25652            51304 #> 4           3 Homo sapiens assay    EFO:0008919                  89477           206754 #> 5           4 Homo sapiens assay    EFO:0008931                  78750           188248 #> 6           5 Homo sapiens assay    EFO:0008953                   4693             9386 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             19            9 #>   cell_type                        613          248 #>   disease                           64            5 #>   self_reported_ethnicity           26            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           220           66 #>   tissue_general                    54           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #> # A tibble: 2 × 7 #>   soma_joinid organism     category  ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>     <chr>                        <int>            <int> #> 1          69 Homo sapiens cell_type CL:0000129                  268114           370771 #> 2        1038 Mus musculus cell_type CL:0000129                   48998            62617 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"cell_type\", \"is_primary_data\")   )$concat() ) #> # A tibble: 56,400,873 × 2 #>    cell_type                     is_primary_data #>    <chr>                         <lgl>           #>  1 syncytiotrophoblast cell      FALSE           #>  2 placental villous trophoblast FALSE           #>  3 syncytiotrophoblast cell      FALSE           #>  4 syncytiotrophoblast cell      FALSE           #>  5 extravillous trophoblast      FALSE           #>  6 placental villous trophoblast FALSE           #>  7 syncytiotrophoblast cell      FALSE           #>  8 extravillous trophoblast      FALSE           #>  9 placental villous trophoblast FALSE           #> 10 syncytiotrophoblast cell      FALSE           #> # ℹ 56,400,863 more rows human_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE\"   )$concat() )$cell_type length(human_cell_types) #> [1] 33364242 human_cell_type_counts <- table(human_cell_types) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #> human_cell_types #>                          neuron            glutamatergic neuron  #>                         2673669                         1541605  #> CD4-positive, alpha-beta T cell CD8-positive, alpha-beta T cell  #>                         1258976                         1235987  #>              classical monocyte                          B cell  #>                         1030996                          908651  #>                     native cell             natural killer cell  #>                          889262                          768755  #>                      macrophage                 oligodendrocyte  #>                          721687                          710242 dim(human_cell_type_counts) #> [1] 599"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"human_liver_cell_types <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = \"cell_type\",     value_filter = \"is_primary_data == TRUE && tissue_general == 'liver'\"   )$concat() )$cell_type sort(table(human_liver_cell_types), decreasing = TRUE)[1:10] #> human_liver_cell_types #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"t_cells_diseased <- as.data.frame(   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(     column_names = c(\"disease\", \"tissue_general\"),     value_filter = \"is_primary_data == TRUE && disease != 'normal' && cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\"   )$concat() ) print(table(t_cells_diseased)) #>                                        tissue_general #> disease                                 adipose tissue adrenal gland  blood blood clot #>   B-cell non-Hodgkin lymphoma                        0             0  62499          0 #>   breast cancer                                      0             0      0          0 #>   chronic obstructive pulmonary disease              0             0      0          0 #>   chronic rhinitis                                   0             0      0          0 #>   clear cell renal carcinoma                         0             0   6548          0 #>   COVID-19                                           0             0 819428          0 #>   Crohn disease                                      0             0      0          0 #>   cystic fibrosis                                    0             0      0          0 #>   Down syndrome                                      0             0      0          0 #>   follicular lymphoma                                0             0      0          0 #>   influenza                                          0             0   8871          0 #>   interstitial lung disease                          0             0      0          0 #>   kidney benign neoplasm                             0             0     20          0 #>   kidney oncocytoma                                  0             0     16          0 #>   lung adenocarcinoma                                0           205      0          0 #>                                        tissue_general #> disease                                 bone marrow  brain breast  colon kidney  liver #>   B-cell non-Hodgkin lymphoma                     0      0      0      0      0      0 #>   breast cancer                                   0      0   1850      0      0      0 #>   chronic obstructive pulmonary disease           0      0      0      0      0      0 #>   chronic rhinitis                                0      0      0      0      0      0 #>   clear cell renal carcinoma                      0      0      0      0  20540      0 #>   COVID-19                                        0      0      0      0      0      0 #>   Crohn disease                                   0      0      0  17490      0      0 #>   cystic fibrosis                                 0      0      0      0      0      0 #>   Down syndrome                                 181      0      0      0      0      0 #>   follicular lymphoma                             0      0      0      0      0      0 #>   influenza                                       0      0      0      0      0      0 #>   interstitial lung disease                       0      0      0      0      0      0 #>   kidney benign neoplasm                          0      0      0      0     10      0 #>   kidney oncocytoma                               0      0      0      0   2408      0 #>   lung adenocarcinoma                             0   3274      0      0      0    507 #>                                        tissue_general #> disease                                   lung lymph node   nose pleural fluid #>   B-cell non-Hodgkin lymphoma                0          0      0             0 #>   breast cancer                              0          0      0             0 #>   chronic obstructive pulmonary disease   9382          0      0             0 #>   chronic rhinitis                           0          0    909             0 #>   clear cell renal carcinoma                 0         36      0             0 #>   COVID-19                               30578          0     13             0 #>   Crohn disease                              0          0      0             0 #>   cystic fibrosis                            7          0      0             0 #>   Down syndrome                              0          0      0             0 #>   follicular lymphoma                        0       1089      0             0 #>   influenza                                  0          0      0             0 #>   interstitial lung disease               1803          0      0             0 #>   kidney benign neoplasm                     0          0      0             0 #>   kidney oncocytoma                          0          0      0             0 #>   lung adenocarcinoma                   215013      24969      0         11558 #>                                        tissue_general #> disease                                 respiratory system saliva small intestine #>   B-cell non-Hodgkin lymphoma                            0      0               0 #>   breast cancer                                          0      0               0 #>   chronic obstructive pulmonary disease                  0      0               0 #>   chronic rhinitis                                       0      0               0 #>   clear cell renal carcinoma                             0      0               0 #>   COVID-19                                               4     41               0 #>   Crohn disease                                          0      0           52029 #>   cystic fibrosis                                        0      0               0 #>   Down syndrome                                          0      0               0 #>   follicular lymphoma                                    0      0               0 #>   influenza                                              0      0               0 #>   interstitial lung disease                              0      0               0 #>   kidney benign neoplasm                                 0      0               0 #>   kidney oncocytoma                                      0      0               0 #>   lung adenocarcinoma                                    0      0               0 #>  [ reached getOption(\"max.print\") -- omitted 10 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load modules needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load AnnData object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(cellxgene.census) library(Seurat) library(patchwork) census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #> # A tibble: 2 × 8 #>   soma_joinid collection_id            collection_name collection_doi dataset_id #>         <int> <chr>                    <chr>           <chr>          <chr>      #> 1         525 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 4546e757-… #> 2         547 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 6202a243-… #> # ℹ 3 more variables: dataset_title <chr>, dataset_h5ad_path <chr>, #> #   dataset_total_cell_count <int> tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- cellxgene.census::get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj[[\"RNA\"]]@meta.features$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:10) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7026 anchors #> Filtering anchors #>  Retained 4880 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 12:19:28 UMAP embedding parameters a = 0.9922 b = 1.112 #> 12:19:28 Read 10153 rows and found 30 numeric columns #> 12:19:28 Using Annoy for neighbor search, n_neighbors = 30 #> 12:19:28 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 12:19:29 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//Rtmprh9z8Q/filea1e85b740dde #> 12:19:29 Searching Annoy index using 1 thread, search_k = 3000 #> 12:19:32 Annoy recall = 100% #> 12:19:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 12:19:33 Initializing from normalized Laplacian + noise (using irlba) #> 12:19:33 Commencing optimization for 200 epochs, with 410804 positive edges #> 12:19:38 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- as.data.frame(human$obs$read(   column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)  cat(paste(   \"There are\",   length(unique_cell_type_ontology_term_id),   \"cell types in the Census! The first few are:\" )) #> There are 613 cell types in the Census! The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000525\" \"CL:2000060\" \"CL:0008036\" \"CL:0002488\" \"CL:0002343\" \"CL:0000084\""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = paste(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\", sep = \"\") )$concat())  # Find all unique values in the cell_type_ontology_term_id column as an R data frame. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id) cat(paste(   \"There are \",   length(unique_cell_type_ontology_term_id),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   sep = \"\" )) #> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048! #> The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0002063\" \"CL:0000775\" \"CL:0001044\" \"CL:0001050\" \"CL:0000814\" \"CL:0000071\"  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] cat(paste(\"The top 10 cell types where tissue_ontology_term_id ==\", LUNG_TISSUE)) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 print(top_10) #>  #> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860 CL:0000623  #>     562038     526859     323985     323610     266333     255425     205013     164944  #> CL:0001064 CL:0002632  #>     149067     132243 # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669 CL:0000003  #>     159096      84750      79618      64190      61830      32088      27515      22707  #> CL:0000057 CL:0002144  #>      20117      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)   obs_df <- as.data.frame(     census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)$concat()   )   total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(paste(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\", sep = \"\"))   } } #> [1] \"mus_musculus\" #>   Unique cell_type_ontology_term_id values: 248 #>   Unique assay_ontology_term_id values: 9 #>   Unique tissue_ontology_term_id values: 66 #> [1] \"homo_sapiens\" #>   Unique cell_type_ontology_term_id values: 613 #>   Unique assay_ontology_term_id values: 19 #>   Unique tissue_ontology_term_id values: 220 cat(paste(\"Complete Census contains\", total_cells, \"cells.\")) #> Complete Census contains 61656118 cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative (2023). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 0.0.0.9000, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative}},   year = {2023},   note = {R package version 0.0.0.9000},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"r-package-of-cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"documentation R package cellxgene.census part Census CZ CELLxGENE Discover. full details Census data capabilities please go main Census site. cellxgene.census provides API efficiently access cloud-hosted Census single-cell data R. just seconds users can access slice Census data using cell gene filters across hundreds single-cell datasets. Census data can fetched iterative fashion bigger--memory slices data, quickly exported basic R structures, well Seurat SingleCellExperiment objects downstream analysis.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"R session, first install tiledb R-Universe, latest release CRAN yet available. install cellxgene.census R-Universe.","code":"install.packages(\"tiledb\", repos = \"https://tiledb-inc.r-universe.dev\") install.packages(\"cellxgene.census\", repos = \"https://chanzuckerberg.r-universe.dev\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"CZ CELLxGENE Discover Cell Census","text":"Check vignettes “Articles” section navigation bar site. highly recommend following vignettes starting point: Querying fetching single-cell data cell/gene metadata Learning CZ CELLxGENE Discover Census can also check quick start guide main Census site.","code":""},{"path":"/index.html","id":"example-seurat-and-singlecellexperiment-query","dir":"","previous_headings":"Usage","what":"Example Seurat and SingleCellExperiment query","title":"CZ CELLxGENE Discover Cell Census","text":"following creates Seurat object -demand sympathetic neurons Census filtering genes ENSG00000161798, ENSG00000188229. following retrieves data SingleCellExperiment object.","code":"library(\"cellxgene.census\") library(\"Seurat\")  census = open_soma()  organism = \"Homo sapiens\" gene_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" cell_filter =  \"cell_type == 'sympathetic neuron'\" cell_columns = c(\"assay\", \"cell_type\", \"tissue\", \"tissue_general\", \"suspension_type\", \"disease\")  seurat_obj = get_seurat(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns ) library(\"SingleCellExperiment\")  sce_obj = get_single_cell_experiment(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns )"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please go visit main Census site. believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-07-25 #>                                                  soma.uri soma.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/soma/      us-west-2 #>                                                  h5ads.uri h5ads.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/       us-west-2 #>   do_not_delete  alias census_version #> 1          TRUE stable         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-07-25 #> latest                     2023-08-01 #> 2023-05-15                 2023-05-15 #> 2023-07-03                 2023-07-03 #> 2023-07-10                 2023-07-10 #> 2023-07-17                 2023-07-17 #> 2023-07-24                 2023-07-24 #> 2023-07-25                 2023-07-25 #> 2023-08-01                 2023-08-01 #>                                                           soma.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/soma/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/soma/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/soma/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/soma/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/soma/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #>            soma.s3_region #> stable          us-west-2 #> latest          us-west-2 #> 2023-05-15      us-west-2 #> 2023-07-03      us-west-2 #> 2023-07-10      us-west-2 #> 2023-07-17      us-west-2 #> 2023-07-24      us-west-2 #> 2023-07-25      us-west-2 #> 2023-08-01      us-west-2 #>                                                           h5ads.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/h5ads/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/h5ads/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/h5ads/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/h5ads/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/h5ads/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #>            h5ads.s3_region do_not_delete  alias #> stable           us-west-2          TRUE stable #> latest           us-west-2         FALSE latest #> 2023-05-15       us-west-2          TRUE        #> 2023-07-03       us-west-2            NA        #> 2023-07-10       us-west-2            NA        #> 2023-07-17       us-west-2            NA        #> 2023-07-24       us-west-2            NA        #> 2023-07-25       us-west-2          TRUE        #> 2023-08-01       us-west-2         FALSE"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-data-public/cell-census/2023-07-25/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(census_version_description, ...)"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(census_version = \"stable\", uri = NULL, tiledbsoma_ctx = NULL)"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused.","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417       census$close()"}]
+[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"library(\"cellxgene.census\") census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #> # A tibble: 593 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           0 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… f171db61-… donor_p13_tr… #>  2           1 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… ecf2e08e-… All donors t… #>  3           2 e2c257e7-6f79-48… Spatial multio… 10.1038/s4158… 74cff64f-… All donors a… #>  4           3 f7cecffa-00b4-45… Mapping single… 10.1016/j.cce… 5af90777-… Single-cell … #>  5           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  6           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  7           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  8           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  9           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #> 10           9 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d8732da6-… Tabula Sapie… #> # ℹ 583 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience function providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #> # A tibble: 60,664 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           6 ENSG00000256661 A2ML1-AS1               452 #>  8           7 ENSG00000184389 A3GALT2                1023 #>  9           8 ENSG00000128274 A4GALT                 3358 #> 10           9 ENSG00000118017 A4GNT                  1779 #> # ℹ 60,654 more rows"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. presence matrix method $take() lets slice soma_joinids census_datasets var. full presence matrix, slices , can exported regular matrix method $get_one_based_matrix() Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"# Get soma_joinid for datasets and genes of interest var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"]  # Slice presence matrix with datasets and genes of interest presence_matrix_slice <- presence_matrix$take(i = dataset_joinid, j = var_joinid)  # Convert presence matrix to regular matrix presence_matrix_slice <- presence_matrix_slice$get_one_based_matrix()  # Find how if the gene is present in this dataset is_present <- presence_matrix_slice[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. presence_matrix_slice  <- presence_matrix$take(j = var_joinid)$get_one_based_matrix() measured_datasets <- presence_matrix_slice[, , drop = TRUE] != 0 dataset_joinids <- datasets_df$soma_joinid[measured_datasets]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #> # A tibble: 42 × 8 #>    soma_joinid collection_id     collection_name collection_doi dataset_id dataset_title #>          <int> <chr>             <chr>           <chr>          <chr>      <chr>         #>  1           4 3f50314f-bdc9-40… Single-cell se… 10.1016/j.cce… bd65a70f-… Single-cell … #>  2           5 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… ff45e623-… Tabula Sapie… #>  3           6 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… f01bdd17-… Tabula Sapie… #>  4           7 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e6a11140-… Tabula Sapie… #>  5           8 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… e5c63d94-… Tabula Sapie… #>  6          10 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… d77ec7d6-… Tabula Sapie… #>  7          11 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… cee11228-… Tabula Sapie… #>  8          13 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a2d4d33e-… Tabula Sapie… #>  9          14 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… a0754256-… Tabula Sapie… #> 10          18 e5f58829-1a66-40… Tabula Sapiens  10.1126/scien… 6d41668c-… Tabula Sapie… #> # ℹ 32 more rows #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int>"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset presence_matrix_slice <- presence_matrix$take(i = dataset_joinids)$get_one_based_matrix() genes_measured <- Matrix::colSums(presence_matrix_slice) > 0 var_joinids <- var_df$soma_joinid[genes_measured]  print(var_df[var_joinids, ]) #> # A tibble: 27,210 × 4 #>    soma_joinid feature_id      feature_name feature_length #>          <int> <chr>           <chr>                 <int> #>  1           0 ENSG00000121410 A1BG                   3999 #>  2           1 ENSG00000268895 A1BG-AS1               3374 #>  3           2 ENSG00000148584 A1CF                   9603 #>  4           3 ENSG00000175899 A2M                    6318 #>  5           4 ENSG00000245105 A2M-AS1                2948 #>  6           5 ENSG00000166535 A2ML1                  7156 #>  7           7 ENSG00000184389 A3GALT2                1023 #>  8           8 ENSG00000128274 A4GALT                 3358 #>  9           9 ENSG00000118017 A4GNT                  1779 #> 10          10 ENSG00000094914 AAAS                   4727 #> # ℹ 27,200 more rows"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"library(\"cellxgene.census\") census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 593 rows x 8 columns #> $soma_joinid <int64 not null> #> $collection_id <large_string not null> #> $collection_name <large_string not null> #> $collection_doi <large_string not null> #> $dataset_id <large_string not null> #> $dataset_title <large_string not null> #> $dataset_h5ad_path <large_string not null> #> $dataset_total_cell_count <int64 not null> census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #> # A tibble: 593 × 3 #>    dataset_id                           dataset_title             dataset_total_cell_c…¹ #>    <chr>                                <chr>                                      <int> #>  1 f171db61-e57e-4535-a06a-35d8b6ef8f2b donor_p13_trophoblasts                     31497 #>  2 ecf2e08e-2032-4a9e-b466-b65b395f4a02 All donors trophoblasts                    67070 #>  3 74cff64f-9da9-4b2a-9b3b-8a04a1598040 All donors all cell stat…                 286326 #>  4 5af90777-6760-4003-9dba-8f945fec6fdf Single-cell transcriptom…                 270855 #>  5 bd65a70f-b274-4133-b9dd-0d1431b6af34 Single-cell sequencing l…                 167283 #>  6 ff45e623-7f5f-46e3-b47d-56be0341f66b Tabula Sapiens - Pancreas                  13497 #>  7 f01bdd17-4902-40f5-86e3-240d66dd2587 Tabula Sapiens - Salivar…                  27199 #>  8 e6a11140-2545-46bc-929e-da243eed2cae Tabula Sapiens - Heart                     11505 #>  9 e5c63d94-593c-4338-a489-e1048599e751 Tabula Sapiens - Bladder                   24583 #> 10 d8732da6-8d1d-42d9-b625-f2416c30054b Tabula Sapiens - Trachea                    9522 #> # ℹ 583 more rows #> # ℹ abbreviated name: ¹​dataset_total_cell_count census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(x) census_data$get(x$name)) print(all_experiments) #> $mus_musculus #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms*  #>  #> $homo_sapiens #> <SOMAExperiment> #>   uri: s3://cellxgene-data-public/cell-census/2023-07-25/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(x) {   nrow(x$obs$read(column_names = c(\"soma_joinid\"))$concat()) }))  print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 61656118 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 61656118 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #> # A tibble: 1 × 8 #>   soma_joinid collection_id      collection_name collection_doi dataset_id dataset_title #>         <int> <chr>              <chr>           <chr>          <chr>      <chr>         #> 1         522 0b9d8a04-bb9d-44d… Tabula Muris S… 10.1038/s4158… 0bd1a1de-… Bone marrow … #> # ℹ 2 more variables: dataset_h5ad_path <chr>, dataset_total_cell_count <int> library(\"tiledbsoma\") obs_query <- SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- census_data$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) #> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package, #> which was just loaded, will retire in October 2023. #> Please refer to R-spatial evolution reports for details, especially #> https://r-spatial.org/r/2023/05/15/evolution4.html. #> It may be desirable to make the sf package available; #> package maintainers should consider adding sf to Suggests:. #> The sp package is now running under evolution status 2 #>      (status 2 uses the sf package in place of rgdal) print(dataset_seurat) #> An object of class Seurat  #> 52392 features across 40220 samples within 1 assay  #> Active assay: RNA (52392 features, 0 variable features)"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download download_source_h5ad(   dataset_id = \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one <column> <op> <value> <column> <op> <column> Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #> # A tibble: 3 × 1 #>   sex     #>   <chr>   #> 1 unknown #> 2 female  #> 3 male as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #> # A tibble: 3,251,334 × 21 #>    soma_joinid dataset_id  assay assay_ontology_term_id cell_type cell_type_ontology_t…¹ #>          <int> <chr>       <chr> <chr>                  <chr>     <chr>                  #>  1           0 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  2           1 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  3           2 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  4           3 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  5           4 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  6           5 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #>  7           6 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #>  8           7 f171db61-e… 10x … EFO:0009922            extravil… CL:0008036             #>  9           8 f171db61-e… 10x … EFO:0009922            placenta… CL:2000060             #> 10           9 f171db61-e… 10x … EFO:0009922            syncytio… CL:0000525             #> # ℹ 3,251,324 more rows #> # ℹ abbreviated name: ¹​cell_type_ontology_term_id #> # ℹ 15 more variables: development_stage <chr>, #> #   development_stage_ontology_term_id <chr>, disease <chr>, #> #   disease_ontology_term_id <chr>, donor_id <chr>, is_primary_data <lgl>, #> #   self_reported_ethnicity <chr>, self_reported_ethnicity_ontology_term_id <chr>, #> #   sex <chr>, sex_ontology_term_id <chr>, suspension_type <chr>, tissue <chr>, … cell_metadata_b_cell <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",   column_names = \"disease\" )  cell_metadata_b_cell <- as.data.frame(cell_metadata_b_cell$concat())  table(cell_metadata_b_cell) #> disease #> chronic obstructive pulmonary disease                              COVID-19  #>                                  6369                                  2729  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" var_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(   value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   column_names = c(\"feature_name\", \"feature_length\") )  as.data.frame(var_df$concat()) #> # A tibble: 2 × 2 #>   feature_name feature_length #>   <chr>                 <int> #> 1 AQP5                   1884 #> 2 TUBB4B                 2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) head(seurat_obj[[]]) #>                orig.ident nCount_RNA nFeature_RNA cell_type tissue_general  disease #> cell5946018 SeuratProject          0            0    B cell           lung COVID-19 #> cell5948526 SeuratProject          1            1    B cell           lung COVID-19 #> cell5949180 SeuratProject          0            0    B cell           lung COVID-19 #> cell5949686 SeuratProject          0            0    B cell           lung COVID-19 #> cell5950730 SeuratProject          0            0    B cell           lung COVID-19 #> cell5952197 SeuratProject          0            0    B cell           lung COVID-19 #>                 sex #> cell5946018    male #> cell5948526 unknown #> cell5949180    male #> cell5949686 unknown #> cell5950730    male #> cell5952197 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(2): feature_name feature_length #> colnames(2729): obs5946018 obs5948526 ... obs48038140 obs48038164 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>              cell_type tissue_general     disease         sex #>            <character>    <character> <character> <character> #> obs5946018      B cell           lung    COVID-19        male #> obs5948526      B cell           lung    COVID-19     unknown #> obs5949180      B cell           lung    COVID-19        male #> obs5949686      B cell           lung    COVID-19     unknown #> obs5950730      B cell           lung    COVID-19        male #> obs5952197      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 2 columns #>                 feature_name feature_length #>                  <character>      <integer> #> ENSG00000161798         AQP5           1884 #> ENSG00000188229       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #> # A tibble: 6 × 7 #>   soma_joinid organism     category ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>    <chr>                        <int>            <int> #> 1           0 Homo sapiens all      na                        33364242         56400873 #> 2           1 Homo sapiens assay    EFO:0008722                 264166           279635 #> 3           2 Homo sapiens assay    EFO:0008780                  25652            51304 #> 4           3 Homo sapiens assay    EFO:0008919                  89477           206754 #> 5           4 Homo sapiens assay    EFO:0008931                  78750           188248 #> 6           5 Homo sapiens assay    EFO:0008953                   4693             9386 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             19            9 #>   cell_type                        613          248 #>   disease                           64            5 #>   self_reported_ethnicity           26            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           220           66 #>   tissue_general                    54           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #> # A tibble: 2 × 7 #>   soma_joinid organism     category  ontology_term_id unique_cell_count total_cell_count #>         <int> <chr>        <chr>     <chr>                        <int>            <int> #> 1          69 Homo sapiens cell_type CL:0000129                  268114           370771 #> 2        1038 Mus musculus cell_type CL:0000129                   48998            62617 #> # ℹ 1 more variable: label <chr>"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"obs_df <-   census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = c(\"cell_type\", \"is_primary_data\")) as.data.frame(obs_df$concat()) #> # A tibble: 56,400,873 × 2 #>    cell_type                     is_primary_data #>    <chr>                         <lgl>           #>  1 syncytiotrophoblast cell      FALSE           #>  2 placental villous trophoblast FALSE           #>  3 syncytiotrophoblast cell      FALSE           #>  4 syncytiotrophoblast cell      FALSE           #>  5 extravillous trophoblast      FALSE           #>  6 placental villous trophoblast FALSE           #>  7 syncytiotrophoblast cell      FALSE           #>  8 extravillous trophoblast      FALSE           #>  9 placental villous trophoblast FALSE           #> 10 syncytiotrophoblast cell      FALSE           #> # ℹ 56,400,863 more rows obs_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE\" )  obs_df <- as.data.frame(obs_df$concat()) nrow(obs_df) #> [1] 33364242 human_cell_type_counts <- table(obs_df$cell_type) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #>  #>                          neuron            glutamatergic neuron  #>                         2673669                         1541605  #> CD4-positive, alpha-beta T cell CD8-positive, alpha-beta T cell  #>                         1258976                         1235987  #>              classical monocyte                          B cell  #>                         1030996                          908651  #>                     native cell             natural killer cell  #>                          889262                          768755  #>                      macrophage                 oligodendrocyte  #>                          721687                          710242 length(human_cell_type_counts) #> [1] 599"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"obs_liver_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE & tissue_general == 'liver'\" )  obs_liver_df <- as.data.frame(obs_liver_df$concat())  sort(table(obs_liver_df$cell_type), decreasing = TRUE)[1:10] #>  #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"obs_t_cells_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = c(\"disease\", \"tissue_general\"),   value_filter = \"is_primary_data == TRUE & disease != 'normal' & cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\" )  obs_t_cells_df <- as.data.frame(obs_t_cells_df$concat())  print(table(obs_t_cells_df)) #>                                        tissue_general #> disease                                 adipose tissue adrenal gland  blood blood clot #>   B-cell non-Hodgkin lymphoma                        0             0  62499          0 #>   breast cancer                                      0             0      0          0 #>   chronic obstructive pulmonary disease              0             0      0          0 #>   chronic rhinitis                                   0             0      0          0 #>   clear cell renal carcinoma                         0             0   6548          0 #>   COVID-19                                           0             0 819428          0 #>   Crohn disease                                      0             0      0          0 #>   cystic fibrosis                                    0             0      0          0 #>   Down syndrome                                      0             0      0          0 #>   follicular lymphoma                                0             0      0          0 #>   influenza                                          0             0   8871          0 #>   interstitial lung disease                          0             0      0          0 #>   kidney benign neoplasm                             0             0     20          0 #>   kidney oncocytoma                                  0             0     16          0 #>   lung adenocarcinoma                                0           205      0          0 #>                                        tissue_general #> disease                                 bone marrow  brain breast  colon kidney  liver #>   B-cell non-Hodgkin lymphoma                     0      0      0      0      0      0 #>   breast cancer                                   0      0   1850      0      0      0 #>   chronic obstructive pulmonary disease           0      0      0      0      0      0 #>   chronic rhinitis                                0      0      0      0      0      0 #>   clear cell renal carcinoma                      0      0      0      0  20540      0 #>   COVID-19                                        0      0      0      0      0      0 #>   Crohn disease                                   0      0      0  17490      0      0 #>   cystic fibrosis                                 0      0      0      0      0      0 #>   Down syndrome                                 181      0      0      0      0      0 #>   follicular lymphoma                             0      0      0      0      0      0 #>   influenza                                       0      0      0      0      0      0 #>   interstitial lung disease                       0      0      0      0      0      0 #>   kidney benign neoplasm                          0      0      0      0     10      0 #>   kidney oncocytoma                               0      0      0      0   2408      0 #>   lung adenocarcinoma                             0   3274      0      0      0    507 #>                                        tissue_general #> disease                                   lung lymph node   nose pleural fluid #>   B-cell non-Hodgkin lymphoma                0          0      0             0 #>   breast cancer                              0          0      0             0 #>   chronic obstructive pulmonary disease   9382          0      0             0 #>   chronic rhinitis                           0          0    909             0 #>   clear cell renal carcinoma                 0         36      0             0 #>   COVID-19                               30578          0     13             0 #>   Crohn disease                              0          0      0             0 #>   cystic fibrosis                            7          0      0             0 #>   Down syndrome                              0          0      0             0 #>   follicular lymphoma                        0       1089      0             0 #>   influenza                                  0          0      0             0 #>   interstitial lung disease               1803          0      0             0 #>   kidney benign neoplasm                     0          0      0             0 #>   kidney oncocytoma                          0          0      0             0 #>   lung adenocarcinoma                   215013      24969      0         11558 #>                                        tissue_general #> disease                                 respiratory system saliva small intestine #>   B-cell non-Hodgkin lymphoma                            0      0               0 #>   breast cancer                                          0      0               0 #>   chronic obstructive pulmonary disease                  0      0               0 #>   chronic rhinitis                                       0      0               0 #>   clear cell renal carcinoma                             0      0               0 #>   COVID-19                                               4     41               0 #>   Crohn disease                                          0      0           52029 #>   cystic fibrosis                                        0      0               0 #>   Down syndrome                                          0      0               0 #>   follicular lymphoma                                    0      0               0 #>   influenza                                              0      0               0 #>   interstitial lung disease                              0      0               0 #>   kidney benign neoplasm                                 0      0               0 #>   kidney oncocytoma                                      0      0               0 #>   lung adenocarcinoma                                    0      0               0 #>  [ reached getOption(\"max.print\") -- omitted 10 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load modules needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load AnnData object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(cellxgene.census) library(Seurat) library(patchwork) census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #> # A tibble: 2 × 8 #>   soma_joinid collection_id            collection_name collection_doi dataset_id #>         <int> <chr>                    <chr>           <chr>          <chr>      #> 1         525 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 4546e757-… #> 2         547 0b9d8a04-bb9d-44da-aa27… Tabula Muris S… 10.1038/s4158… 6202a243-… #> # ℹ 3 more variables: dataset_title <chr>, dataset_h5ad_path <chr>, #> #   dataset_total_cell_count <int> tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj$RNA[[]]$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:30) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7026 anchors #> Filtering anchors #>  Retained 4880 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 12:40:57 UMAP embedding parameters a = 0.9922 b = 1.112 #> 12:40:57 Read 10153 rows and found 30 numeric columns #> 12:40:57 Using Annoy for neighbor search, n_neighbors = 30 #> 12:40:57 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 12:40:59 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//Rtmpg3DKZz/file41847a77c42b #> 12:40:59 Searching Annoy index using 1 thread, search_k = 3000 #> 12:41:02 Annoy recall = 100% #> 12:41:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 12:41:03 Initializing from normalized Laplacian + noise (using irlba) #> 12:41:03 Commencing optimization for 200 epochs, with 410804 positive edges #> 12:41:09 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- human$obs$read(column_names = c(\"cell_type\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column. unique_cell_type <- unique(obs_df$cell_type)  cat(   \"There are\",   length(unique_cell_type),   \"cell types in the Census! The first few are: \",   paste(head(unique_cell_type), collapse = \", \") ) #> There are 613 cell types in the Census! The first few are:  syncytiotrophoblast cell, placental villous trophoblast, extravillous trophoblast, trophoblast giant cell, decidual natural killer cell, human, T cell"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms & | use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- human$obs$read(column_names = c(\"cell_type\"), value_filter = paste0(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column as an R data frame. unique_cell_type <- unique(obs_df$cell_type) cat(   \"There are \",   length(unique_cell_type),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   paste(head(unique_cell_type), collapse = \", \"),   \"\\n\" ) #> There are  185  cell types in the Census where tissue_ontology_term_id ==  UBERON:0002048 ! #> The first few are: type II pneumocyte, neutrophil, effector CD4-positive, alpha-beta T cell, effector CD8-positive, alpha-beta T cell, mature NK T cell, blood vessel endothelial cell  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type), decreasing = TRUE)[1:10] cat(   \"The top 10 cell types where tissue_ontology_term_id ==\",    LUNG_TISSUE,   \"are: \",   paste(names(top_10), collapse = \", \") ) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 are:  native cell, alveolar macrophage, CD8-positive, alpha-beta T cell, CD4-positive, alpha-beta T cell, macrophage, type II pneumocyte, classical monocyte, natural killer cell, malignant cell, epithelial cell of lower respiratory tract # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )  obs_df <- as.data.frame(obs_df$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669 CL:0000003  #>     159096      84750      79618      64190      61830      32088      27515      22707  #> CL:0000057 CL:0002144  #>      20117      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)      obs_df <- census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)   obs_df <- as.data.frame(obs_df$concat())      total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\")   } } #> [1] \"mus_musculus\" #>   Unique  cell_type_ontology_term_id  values:  248  #>   Unique  assay_ontology_term_id  values:  9  #>   Unique  tissue_ontology_term_id  values:  66  #> [1] \"homo_sapiens\" #>   Unique  cell_type_ontology_term_id  values:  613  #>   Unique  assay_ontology_term_id  values:  19  #>   Unique  tissue_ontology_term_id  values:  220 cat(\"Complete Census contains \", total_cells, \" cells.\") #> Complete Census contains  61656118  cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative (2023). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 0.0.9.9, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative}},   year = {2023},   note = {R package version 0.0.9.9},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"r-package-of-cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"documentation R package cellxgene.census part CZ CELLxGENE Discover Census. full details Census data capabilities please go main Census site. cellxgene.census provides API efficiently access cloud-hosted Census single-cell data R. just seconds users can access slice Census data using cell gene filters across hundreds single-cell datasets. Census data can fetched iterative fashion bigger--memory slices data, quickly exported basic R structures, well Seurat SingleCellExperiment objects downstream analysis.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"R session, first install tiledb R-Universe, latest release CRAN yet available. installing Ubuntu, may need install following libraries via apt install: libxml2-dev libssl-dev libcurl4-openssl-dev. installing MacOS, need install developer tools Xcode. install cellxgene.census R-Universe.","code":"install.packages(   \"tiledb\",   version = \"0.20.3\",    repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org')  ) install.packages(   \"cellxgene.census\",   repos=c('https://chanzuckerberg.r-universe.dev', 'https://cloud.r-project.org')  )"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"CZ CELLxGENE Discover Cell Census","text":"Check vignettes “Articles” section navigation bar site. highly recommend following vignettes starting point: Querying fetching single-cell data cell/gene metadata Learning CZ CELLxGENE Discover Census can also check quick start guide main Census site.","code":""},{"path":"/index.html","id":"example-seurat-and-singlecellexperiment-query","dir":"","previous_headings":"Usage","what":"Example Seurat and SingleCellExperiment query","title":"CZ CELLxGENE Discover Cell Census","text":"following creates Seurat object -demand sympathetic neurons Census filtering genes ENSG00000161798, ENSG00000188229. following retrieves data SingleCellExperiment object.","code":"library(\"cellxgene.census\") library(\"Seurat\")  census <- open_soma()  organism <- \"Homo sapiens\" gene_filter <- \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" cell_filter <-  \"cell_type == 'sympathetic neuron'\" cell_columns <- c(\"assay\", \"cell_type\", \"tissue\", \"tissue_general\", \"suspension_type\", \"disease\")  seurat_obj <- get_seurat(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns ) library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns )"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please go visit main Census site. believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-07-25 #>                                                  soma.uri soma.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/soma/      us-west-2 #>                                                  h5ads.uri h5ads.s3_region #> 1 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/       us-west-2 #>   do_not_delete  alias census_version #> 1          TRUE stable         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-07-25 #> latest                     2023-08-01 #> 2023-05-15                 2023-05-15 #> 2023-07-03                 2023-07-03 #> 2023-07-10                 2023-07-10 #> 2023-07-17                 2023-07-17 #> 2023-07-24                 2023-07-24 #> 2023-07-25                 2023-07-25 #> 2023-08-01                 2023-08-01 #>                                                           soma.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/soma/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/soma/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/soma/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/soma/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/soma/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/soma/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/soma/ #>            soma.s3_region #> stable          us-west-2 #> latest          us-west-2 #> 2023-05-15      us-west-2 #> 2023-07-03      us-west-2 #> 2023-07-10      us-west-2 #> 2023-07-17      us-west-2 #> 2023-07-24      us-west-2 #> 2023-07-25      us-west-2 #> 2023-08-01      us-west-2 #>                                                           h5ads.uri #> stable     s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> latest     s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #> 2023-05-15 s3://cellxgene-data-public/cell-census/2023-05-15/h5ads/ #> 2023-07-03 s3://cellxgene-data-public/cell-census/2023-07-03/h5ads/ #> 2023-07-10 s3://cellxgene-data-public/cell-census/2023-07-10/h5ads/ #> 2023-07-17 s3://cellxgene-data-public/cell-census/2023-07-17/h5ads/ #> 2023-07-24 s3://cellxgene-data-public/cell-census/2023-07-24/h5ads/ #> 2023-07-25 s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/ #> 2023-08-01 s3://cellxgene-data-public/cell-census/2023-08-01/h5ads/ #>            h5ads.s3_region do_not_delete  alias #> stable           us-west-2          TRUE stable #> latest           us-west-2         FALSE latest #> 2023-05-15       us-west-2          TRUE        #> 2023-07-03       us-west-2            NA        #> 2023-07-10       us-west-2            NA        #> 2023-07-17       us-west-2            NA        #> 2023-07-24       us-west-2            NA        #> 2023-07-25       us-west-2          TRUE        #> 2023-08-01       us-west-2         FALSE"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-data-public/cell-census/2023-07-25/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-data-public/cell-census/2023-07-25/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(census_version_description, ...)"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(census_version = \"stable\", uri = NULL, tiledbsoma_ctx = NULL)"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused.","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-07-25. Specify census_version = \"2023-07-25\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #> # A tibble: 7 × 3 #>   soma_joinid label                      value      #>         <int> <chr>                      <chr>      #> 1           0 census_schema_version      1.0.0      #> 2           1 census_build_date          2023-07-25 #> 3           2 dataset_schema_version     3.0.0      #> 4           3 total_cell_count           61656118   #> 5           4 unique_cell_count          37447773   #> 6           5 number_donors_homo_sapiens 13035      #> 7           6 number_donors_mus_musculus 1417       census$close()"}]
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index de1bf4099..7e3f0cdd4 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -31,12 +31,14 @@ If installing in a Databricks notebook environment, use `%pip install`. Do not u
 
 From an R session, first install `tiledb` from R-Universe, the latest release in CRAN is not yet available.
 
-If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`
+If installing from Ubuntu, you may need to install the following libraries via `apt install`:  `libxml2-dev` `libssl-dev` `libcurl4-openssl-dev`.
+
+If installing from MacOS, you will need to install the [developer tools Xcode](https://apps.apple.com/us/app/xcode/id497799835?mt=12).
 
 ```r
 install.packages(
   "tiledb",
-  version = "0.20.2", 
+  version = "0.20.3", 
   repos=c('https://tiledb-inc.r-universe.dev','https://cloud.r-project.org') 
 )
 ```