update DE

MangiolaLaboratory · Dec 6, 2023 · 4eacd4b · 4eacd4b
1 parent 1bf2880
commit 4eacd4b
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 27 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(map2_test_differential_abundance_hpc)
 export(map_add_dispersion_to_se)
 export(map_split_sce_by_gene)
 export(map_split_se_by_gene)
+export(map_split_se_by_number_of_genes)
 export(map_test_differential_abundance)
 export(non_batch_variation_removal)
 export(preprocessing_output)
@@ -103,4 +104,3 @@ importFrom(tidyr,replace_na)
 importFrom(tidyr,unite)
 importFrom(tidyr,unnest)
 importFrom(tidyseurat,aggregate_cells)
-importFrom(utils,data)
diff --git a/R/functions.R b/R/functions.R
@@ -1196,6 +1196,33 @@ map_split_se_by_gene = function(se_df, .col, .number_of_chunks){
     mutate(se_md5 = ids::random_id(n()))
 }
 
+#' @export
+map_split_se_by_number_of_genes = function(se_df, .col, chunk_size = 100){
+
+  .col = enquo(.col)
+
+  se_df |>
+    mutate(!!.col := map(
+      !!.col,
+      ~ {
+        total_rows = nrow(.x)
+        num_chunks = ceiling(total_rows / chunk_size)
+
+        chunks =
+          tibble(.feature = rownames(.x)) |>
+          mutate(chunk___ = rep(1:num_chunks, each = chunk_size, length.out = nrow(.x)))
+
+        # Join chunks
+        grouping_factor = chunks |> pull(chunk___) |> as.factor()
+
+        .x |> splitRowData(f = grouping_factor)
+      }
+    )) |>
+    unnest(!!.col) |>
+    mutate(se_md5 = ids::random_id(n()))
+}
+
+
 #' @importFrom digest digest
 #' @importFrom rlang enquo
 #'

diff --git a/R/targets_functions.R b/R/targets_functions.R
@@ -54,6 +54,7 @@ map2_test_differential_abundance_hpc = function(
   computing_resources |> saveRDS("temp_computing_resources.rds")
   debug_job_id |> saveRDS("temp_debug_job_id.rds")
   .abundance |> saveRDS("temp_abundance_column_name.rds")
+  data_list |> length() |> saveRDS("temp_number_of_datasets.rds")
 
 
   # Header
@@ -95,7 +96,8 @@ map2_test_differential_abundance_hpc = function(
         tar_target(file_data, "temp_data.rds", format = "file", deployment = "main"),
         tar_target(file_formula, "temp_formula.rds", format = "file", deployment = "main"),
         tar_target(abundance, readRDS("temp_abundance_column_name.rds"), deployment = "main"),
-        tar_target( number_of_workers, readRDS("temp_computing_resources.rds")$client$workers, deployment = "main" )
+        tar_target( number_of_workers, readRDS("temp_computing_resources.rds")$client$workers, deployment = "main" ),
+        tar_target( number_of_datasets, readRDS("temp_number_of_datasets.rds"), deployment = "main" )
       )
 
   }, glue("{store}.R"))
@@ -132,7 +134,7 @@ map2_test_differential_abundance_hpc = function(
         pseudobulk_df_tissue_split_by_gene, 
         pseudobulk_df_tissue_dispersion |> map_split_se_by_gene(
           data, 
-          number_of_workers
+          number_of_workers # / number_of_datasets
         ), 
 
         pattern = map(pseudobulk_df_tissue_dispersion),
@@ -152,7 +154,7 @@ map2_test_differential_abundance_hpc = function(
         pseudobulk_df_tissue_split_by_gene_grouped |>
 
           # transform back to formula because I converted to character before
-          mutate(formula = formula |> as.formula()) |> 
+          mutate(formula = map(formula, as.formula)) |> 
 
           map_test_differential_abundance(
             data,

diff --git a/R_scripts/de_parallel.R b/R_scripts/de_parallel.R
@@ -334,16 +334,16 @@ nest_detect_complete_confounder = function(.data, .col1, .col2){
 #     }
 #   )) |> 
 # mutate(data = map(data, tidybulk::identify_abundant, factor_of_interest = ethnicity_simplified )) |> 
-#
+# slice(1:22)
 # 
-# se_big |> saveRDS("R_scripts/se_big.rds")
+# se_big |> saveRDS("R_scripts/se_big.rds", compress = "xz")
 
 se_big = readRDS("~/PostDoc/HPCell/R_scripts/se_big.rds")
 
 tic()
 se_big |> 
   pull(data) %>%
-  .[[1]] |> 
+  .[[24]] |> 
   tidybulk::identify_abundant(factor_of_interest = ethnicity_simplified) |> 
   tidybulk::test_differential_abundance(
     ~ age_days * sex + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 | file_id), 
@@ -362,16 +362,13 @@ slurm = crew.cluster::crew_controller_slurm(
 
 
 microbenchmark(
-  { set.seed(43); 
     x = 
     se_big |>
-    slice(1:10) |> 
     mutate(data = map2_test_differential_abundance_hpc(
       data,
       formula ,
       computing_resources = slurm
-    ))
-    }, 
+    )), 
   times = 1
 )
 

diff --git a/man/theme_multipanel.Rd b/man/theme_multipanel.Rd