-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrna_load.R
67 lines (46 loc) · 1.35 KB
/
scrna_load.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
library(tidyverse)
setwd("/ix/djishnu/Aaron")
library(reticulate)
reticulate::use_condaenv(condaenv = "analysis_env", required = T)
library(anndata)
library(Seurat)
library(SeuratDisk)
library(SeuratObject)
library(SeuratWrappers)
library(SingleCellExperiment)
sc = reticulate::import("scanpy")
data_path = "/ix/djishnu/Aaron/scrnaseq/covid19atlas/"
pbmc_data = "meyer_nikolic_covid_pbmc.cellxgene.20210813.h5ad"
pbmc = anndata::read_h5ad(paste0(data_path, pbmc_data))
sce = SingleCellExperiment(
assays = list(logcounts = t(pbmc$X)),
colData = pbmc$obs,
rowData = pbmc$var,
reducedDims = pbmc$obsm
)
# import genes of interest
genes = read_tsv("/ix/djishnu/Aaron/data/gene_list.tsv")
gene_locations = which(pbmc$var_names %in% genes$unique.results.distinct.gene_name.)
# subset genes
pbmc_desired = pbmc[,gene_locations]
anndata::write_h5ad(pbmc_desired, filename = "/ix/djishnu/Aaron/data/genelist_counts.h5ad")
# # get log counts
#
# lgc = logcounts(sce)
#
#
# # get rows with genes of interest
#
# gene_expr = lgc[gene_locations,]
#
#
#
# # conver to anndata
#
# exprs = assay(sce, "logcounts")
# col_data = as.data.frame(colData(sce))
# row_data = as.data.frame(rowData(sce))
# embedding = as.data.frame(reducedDims(sce))
#
# anndata_output = AnnData(X = exprs, obs = col_data, var = row_data,
# obsm = embedding)