Optionally ignore invalid size factors during normalization.

This includes size factors of zero or non-finite size factors. By default, these options are disabled to fail fast in the presence of invalid size factors. Nonetheless, users can enforce a fail-free run by turning on these options; for example, this is done automatically by quickAdtSizeFactors.
kanaverse · Jul 25, 2023 · 5fb16a9 · 5fb16a9
1 parent 57ddaf8
commit 5fb16a9
Show file tree

Hide file tree

Showing 8 changed files with 39 additions and 12 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,6 @@
 # scran.js news
 
-## 3.0.0-alpha.1
+## 3.0.0-alpha.3
 
 **Changes**
 
@@ -27,6 +27,9 @@
     This avoids exposing the status objects for basic use cases.
 - Removed many of the `empty*Results()` functions, as these are not necessary for regular uses of this package.
 - Removed the `consume=` option from `initializeSparseMatrixFromRds()`, as the potential damage from pass-by-reference mutations is too high for the minor improvement in performance.
+- Added `allowNonFinite=` option to the `logNormCounts()` function to handle infinite and missing size factors.
+- Added `allowZeros=` and `allowNonFinite=` options to the `groupedSizeFactors()` function to handle infinite and missing size factors.
+  Both options are automatically set to `true` when calling `groupedSizeFactors()` from the `quickAdtSizeFactors()` function.
 
 ## 2.1.8
 

diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
@@ -77,7 +77,7 @@ FetchContent_MakeAvailable(umappp)
 FetchContent_Declare(
   scran 
   GIT_REPOSITORY https://github.com/LTLA/libscran
-  GIT_TAG da495dad607bed894cbcbb7ed811fa8d4983d576
+  GIT_TAG 8659e9c76bd7656b1d75051f26eb5aa08c935d9c
 )
 FetchContent_MakeAvailable(scran)
 

diff --git a/js/groupedSizeFactors.js b/js/groupedSizeFactors.js
@@ -13,6 +13,12 @@ import * as wa from "wasmarrays.js";
  * @param {object} [options={}] - Optional parameters.
  * @param {boolean} [options.center=true] - Whether to return centered size factors.
  * If `false`, the size factors can be interpreted as the scaling to match `reference`.
+ * @param {boolean} [options.allowZeros=false] - Whether size factors of zero should be allowed for the groups.
+ * If `true`, any size factors of zero are converted to the smallest non-zero size factor across all groups. 
+ * If `false`, an error is raised instead.
+ * @param {boolean} [options.allowZeros=false] - Whether non-finite size factors should be allowed for the groups.
+ * If `true`, size factors of infinity or NaN are converted to the largest non-zero size factor across all groups or 1, respectively.
+ * If `false`, an error is raised instead.
  * @param {?Float64WasmArray} [options.buffer=null] - Output buffer for the size factors.
  * This should have length equal to the number of columns in `x`.
  * @param {number} [options.priorCount=10] - Prior count to use for shrinking size factors towards the relative library size.
@@ -27,7 +33,7 @@ import * as wa from "wasmarrays.js";
  *
  * If `buffer` was supplied, it is used as the return value.
  */
-export function groupedSizeFactors(x, groups, { center = true, buffer = null, priorCount = 10, reference = null, numberOfThreads = null } = {}) {
+export function groupedSizeFactors(x, groups, { center = true, allowZeros = false, allowNonFinite = false, buffer = null, priorCount = 10, reference = null, numberOfThreads = null } = {}) {
     var local_buffer;
     var group_arr;
     let nthreads = utils.chooseNumberOfThreads(numberOfThreads);
@@ -45,7 +51,7 @@ export function groupedSizeFactors(x, groups, { center = true, buffer = null, pr
             reference = -1;
         }
 
-        wasm.call(module => module.grouped_size_factors(x.matrix, group_arr.offset, center, priorCount, reference, buffer.offset, nthreads));
+        wasm.call(module => module.grouped_size_factors(x.matrix, group_arr.offset, center, allowZeros, allowNonFinite, priorCount, reference, buffer.offset, nthreads));
 
     } catch (e) {
         utils.free(local_buffer);

diff --git a/js/logNormCounts.js b/js/logNormCounts.js
@@ -21,12 +21,15 @@ import * as wasm from "./wasm.js";
  *
  * This option is ignored if `center = false`, in which case it is assumed that scaling has already been provided in the input `sizeFactors`.
  * @param {boolean} [options.allowZeros=false] - Whether size factors of zero should be allowed.
- * If `true`, no scaling normalization is performed for the corresponding cells, under the assumption they are all-zero libraries.
+ * If `true`, size factors of zero are converted to the smallest non-zero size factor across all cells.
+ * If `false`, an error is raised instead.
+ * @param {boolean} [options.allowZeros=false] - Whether non-finite size factors should be allowed.
+ * If `true`, size factors of infinity or NaN are converted to the largest non-zero size factor in the dataset or 1, respectively.
  * If `false`, an error is raised instead.
  *
  * @return {ScranMatrix} A matrix of the same type as `x` containing log-transformed normalized expression values.
  */
-export function logNormCounts(x, { sizeFactors = null, center = true, block = null, allowZeros = false } = {}) {
+export function logNormCounts(x, { sizeFactors = null, center = true, block = null, allowZeros = false, allowNonFinite = false } = {}) {
     var sf_data;
     var block_data;
     var output;
@@ -57,7 +60,7 @@ export function logNormCounts(x, { sizeFactors = null, center = true, block = nu
         }
 
         output = gc.call(
-            module => module.log_norm_counts(x.matrix, use_sf, sfptr, use_blocks, bptr, center, allowZeros),
+            module => module.log_norm_counts(x.matrix, use_sf, sfptr, use_blocks, bptr, center, allowZeros, allowNonFinite),
             x.constructor
         );
 

diff --git a/js/quickAdtSizeFactors.js b/js/quickAdtSizeFactors.js
@@ -53,7 +53,7 @@ export function quickAdtSizeFactors(x, { numberOfClusters = 20, numberOfPCs = 25
         } else if (buffer.length !== x.numberOfColumns()) {
             throw new Error("length of 'buffer' should be equal to the number of columns in 'x'");
         }
-        groupedSizeFactors(x, clust.clusters({ copy: "view" }), { buffer: buffer, numberOfThreads: numberOfThreads });
+        groupedSizeFactors(x, clust.clusters({ copy: "view" }), { buffer: buffer, allowZeros: true, allowNonFinite: true, numberOfThreads: numberOfThreads });
 
     } catch (e) {
         utils.free(local_buffer);

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "scran.js",
-    "version": "3.0.0-alpha.2",
+    "version": "3.0.0-alpha.3",
     "description": "Single cell RNA-seq analysis in Javascript",
     "license": "MIT",
     "main": "main/index.js",

diff --git a/src/grouped_size_factors.cpp b/src/grouped_size_factors.cpp
@@ -8,9 +8,22 @@
 
 #include <vector>
 
-void grouped_size_factors(const NumericMatrix& mat, uintptr_t groups, bool center, double prior_count, int reference, uintptr_t output, int nthreads) {
+void grouped_size_factors(const NumericMatrix& mat, 
+    uintptr_t groups, 
+    bool center, 
+    bool allow_zero,
+    bool allow_non_finite,
+    double prior_count, 
+    int reference, 
+    uintptr_t output, 
+    int nthreads) 
+{
     scran::GroupedSizeFactors runner;
-    runner.set_center(center).set_prior_count(prior_count).set_num_threads(nthreads);
+    runner.set_center(center);
+    runner.set_prior_count(prior_count);
+    runner.set_num_threads(nthreads);
+    runner.set_handle_zeros(allow_zero);
+    runner.set_handle_non_finite(allow_non_finite);
 
     auto gptr = reinterpret_cast<const int*>(groups);
     auto optr = reinterpret_cast<double*>(output);

diff --git a/src/log_norm_counts.cpp b/src/log_norm_counts.cpp
@@ -15,10 +15,12 @@ NumericMatrix log_norm_counts(const NumericMatrix& mat,
     bool use_blocks, 
     uintptr_t blocks,
     bool center,
-    bool allow_zero)
+    bool allow_zero,
+    bool allow_non_finite)
 {
     scran::LogNormCounts norm;
     norm.set_handle_zeros(allow_zero);
+    norm.set_handle_non_finite(allow_non_finite);
     norm.set_center(center);
 
     std::vector<double> sf;