Skip to content

Commit

Permalink
[python] Update scvi pipeline for the June LTS training (#1173)
Browse files Browse the repository at this point in the history
* Update scvi pipeline for the June LTS training

* parametrize census config
  • Loading branch information
ebezzi authored Jun 28, 2024
1 parent 443845d commit 8d1e103
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
4 changes: 3 additions & 1 deletion tools/models/scvi/scvi-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ census:
null
obs_query_model: # Required when loading data for model training. Do not change.
'is_primary_data == True and nnz >= 300'
version:
"2024-05-20"
hvg:
top_n_hvg:
8000
Expand All @@ -19,7 +21,7 @@ model:
filename: "scvi.model"
n_hidden: 512
n_latent: 50
n_layers: 1
n_layers: 2
train:
max_epochs: 100
batch_size: 1024
Expand Down
7 changes: 4 additions & 3 deletions tools/models/scvi/scvi-create-latent-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
with open(file) as f:
config = yaml.safe_load(f)

census = cellxgene_census.open_soma(census_version="2023-12-15")

census_config = config.get("census")
experiment_name = census_config.get("organism")
obs_value_filter = census_config.get("obs_query")

version = census_config.get("version")
census = cellxgene_census.open_soma(census_version=version)

hv = pd.read_pickle("hv_genes.pkl")
hv_idx = hv[hv].index

Expand All @@ -44,7 +45,7 @@

adata.var.set_index("feature_id", inplace=True)

idx = query.obs(column_names=["soma_joinid"]).concat().to_pandas().index.to_numpy()
idx = query.obs(column_names=["soma_joinid"]).concat().to_pandas().to_numpy()

del census, query, hv, hv_idx
gc.collect()
Expand Down
5 changes: 3 additions & 2 deletions tools/models/scvi/scvi-prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
with open(file) as f:
config = yaml.safe_load(f)

census = cellxgene_census.open_soma(census_version="2023-12-15")

census_config = config.get("census")
experiment_name = census_config.get("organism")
obs_query = census_config.get("obs_query")
obs_query_model = census_config.get("obs_query_model")

version = census_config.get("version")
census = cellxgene_census.open_soma(census_version=version)

if obs_query is None:
obs_value_filter = obs_query_model
else:
Expand Down

0 comments on commit 8d1e103

Please sign in to comment.