Skip to content

Commit

Permalink
update DE
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed Dec 6, 2023
1 parent 1bf2880 commit 4eacd4b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 27 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(map2_test_differential_abundance_hpc)
export(map_add_dispersion_to_se)
export(map_split_sce_by_gene)
export(map_split_se_by_gene)
export(map_split_se_by_number_of_genes)
export(map_test_differential_abundance)
export(non_batch_variation_removal)
export(preprocessing_output)
Expand Down Expand Up @@ -103,4 +104,3 @@ importFrom(tidyr,replace_na)
importFrom(tidyr,unite)
importFrom(tidyr,unnest)
importFrom(tidyseurat,aggregate_cells)
importFrom(utils,data)
27 changes: 27 additions & 0 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,33 @@ map_split_se_by_gene = function(se_df, .col, .number_of_chunks){
mutate(se_md5 = ids::random_id(n()))
}

#' @export
map_split_se_by_number_of_genes = function(se_df, .col, chunk_size = 100){

.col = enquo(.col)

se_df |>
mutate(!!.col := map(
!!.col,
~ {
total_rows = nrow(.x)
num_chunks = ceiling(total_rows / chunk_size)

chunks =
tibble(.feature = rownames(.x)) |>
mutate(chunk___ = rep(1:num_chunks, each = chunk_size, length.out = nrow(.x)))

# Join chunks
grouping_factor = chunks |> pull(chunk___) |> as.factor()

.x |> splitRowData(f = grouping_factor)
}
)) |>
unnest(!!.col) |>
mutate(se_md5 = ids::random_id(n()))
}


#' @importFrom digest digest
#' @importFrom rlang enquo
#'
Expand Down
8 changes: 5 additions & 3 deletions R/targets_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ map2_test_differential_abundance_hpc = function(
computing_resources |> saveRDS("temp_computing_resources.rds")
debug_job_id |> saveRDS("temp_debug_job_id.rds")
.abundance |> saveRDS("temp_abundance_column_name.rds")
data_list |> length() |> saveRDS("temp_number_of_datasets.rds")


# Header
Expand Down Expand Up @@ -95,7 +96,8 @@ map2_test_differential_abundance_hpc = function(
tar_target(file_data, "temp_data.rds", format = "file", deployment = "main"),
tar_target(file_formula, "temp_formula.rds", format = "file", deployment = "main"),
tar_target(abundance, readRDS("temp_abundance_column_name.rds"), deployment = "main"),
tar_target( number_of_workers, readRDS("temp_computing_resources.rds")$client$workers, deployment = "main" )
tar_target( number_of_workers, readRDS("temp_computing_resources.rds")$client$workers, deployment = "main" ),
tar_target( number_of_datasets, readRDS("temp_number_of_datasets.rds"), deployment = "main" )
)

}, glue("{store}.R"))
Expand Down Expand Up @@ -132,7 +134,7 @@ map2_test_differential_abundance_hpc = function(
pseudobulk_df_tissue_split_by_gene,
pseudobulk_df_tissue_dispersion |> map_split_se_by_gene(
data,
number_of_workers
number_of_workers # / number_of_datasets
),

pattern = map(pseudobulk_df_tissue_dispersion),
Expand All @@ -152,7 +154,7 @@ map2_test_differential_abundance_hpc = function(
pseudobulk_df_tissue_split_by_gene_grouped |>

# transform back to formula because I converted to character before
mutate(formula = formula |> as.formula()) |>
mutate(formula = map(formula, as.formula)) |>

map_test_differential_abundance(
data,
Expand Down
11 changes: 4 additions & 7 deletions R_scripts/de_parallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -334,16 +334,16 @@ nest_detect_complete_confounder = function(.data, .col1, .col2){
# }
# )) |>
# mutate(data = map(data, tidybulk::identify_abundant, factor_of_interest = ethnicity_simplified )) |>
#
# slice(1:22)
#
# se_big |> saveRDS("R_scripts/se_big.rds")
# se_big |> saveRDS("R_scripts/se_big.rds", compress = "xz")

se_big = readRDS("~/PostDoc/HPCell/R_scripts/se_big.rds")

tic()
se_big |>
pull(data) %>%
.[[1]] |>
.[[24]] |>
tidybulk::identify_abundant(factor_of_interest = ethnicity_simplified) |>
tidybulk::test_differential_abundance(
~ age_days * sex + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 | file_id),
Expand All @@ -362,16 +362,13 @@ slurm = crew.cluster::crew_controller_slurm(


microbenchmark(
{ set.seed(43);
x =
se_big |>
slice(1:10) |>
mutate(data = map2_test_differential_abundance_hpc(
data,
formula ,
computing_resources = slurm
))
},
)),
times = 1
)

Expand Down
16 changes: 0 additions & 16 deletions man/theme_multipanel.Rd

This file was deleted.

0 comments on commit 4eacd4b

Please sign in to comment.