From e855f6bbdbd8e3578d907057be7e0d8b4003698b Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Mon, 14 Oct 2024 17:00:04 -0700 Subject: [PATCH] Emit warnings if the requested and observed number of neighbors are different. (#94) This is a quality-of-life improvement so that the functions don't silently ignore the user-specified number of neighbors in favor of the number in the search results. Now, at least the user knows that there's a difference. --- js/buildSnnGraph.js | 13 ++++--- js/findNearestNeighbors.js | 9 ++++- js/runTsne.js | 57 +++++++++++++++--------------- js/runUmap.js | 25 +++++++------ src/NeighborIndex.cpp | 1 + src/NeighborIndex.h | 4 +++ tests/clusterGraph.test.js | 4 +-- tests/findNearestNeighbors.test.js | 12 +++++++ tests/runTsne.test.js | 6 ++++ tests/runUmap.test.js | 6 ++++ 10 files changed, 91 insertions(+), 46 deletions(-) diff --git a/js/buildSnnGraph.js b/js/buildSnnGraph.js index 95416377..0f8d0403 100644 --- a/js/buildSnnGraph.js +++ b/js/buildSnnGraph.js @@ -35,11 +35,13 @@ export class BuildSnnGraphResults { } /** - * Build a shared nearest graph. + * Build a shared nearest graph where each cell is a node. + * Edges are formed between cells that share one or more nearest neighbors, weighted by the number or rank of those shared neighbors. * - * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x - * Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}), - * or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}. + * + * Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}. + * The number of neighbors should be equal to `neighbors`, otherwise a warning is raised. * @param {object} [options={}] - Optional parameters. * @param {number} [options.scheme="rank"] - Weighting scheme for the edges between cells. * This can be based on the top ranks of the shared neighbors (`"rank"`), @@ -64,6 +66,9 @@ export function buildSnnGraph(x, options = {}) { try { let ref; if (x instanceof FindNearestNeighborsResults) { + if (neighbors != x.numberOfNeighbors()) { + console.warn("number of neighbors in 'x' does not match 'neighbors'"); + } ref = x; } else { my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads }); diff --git a/js/findNearestNeighbors.js b/js/findNearestNeighbors.js index f25c4da0..cfd8b369 100644 --- a/js/findNearestNeighbors.js +++ b/js/findNearestNeighbors.js @@ -114,7 +114,7 @@ export function buildNeighborSearchIndex(x, options = {}) { export class FindNearestNeighborsResults { #id; #results; - +u constructor(id, raw) { this.#id = id; this.#results = raw; @@ -141,6 +141,13 @@ export class FindNearestNeighborsResults { return this.#results.num_obs(); } + /** + * @return {number} Number of neighbors that were requested in the search. + */ + numberOfNeighbors() { + return this.#results.num_neighbors(); + } + // Internal use only, not documented. get results() { return this.#results; diff --git a/js/runTsne.js b/js/runTsne.js index 6275bc70..f5759d5d 100644 --- a/js/runTsne.js +++ b/js/runTsne.js @@ -103,49 +103,47 @@ export function perplexityToNeighbors(perplexity) { } /** - * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x - * Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}), - * or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}. + * + * Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}. + * The number of neighbors should be equal to `neighbors`, otherwise a warning is raised. * @param {object} [options={}] - Optional parameters. * @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE. - * @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors. - * Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object. + * @param {?number} [options.neighbors=null] - Number of nearest neighbors to find. + * If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}. * @param {?number} [options.numberOfThreads=null] - Number of threads to use. * If `null`, defaults to {@linkcode maximumThreads}. * * @return {TsneStatus} Object containing the initial status of the t-SNE algorithm. */ export function initializeTsne(x, options = {}) { - const { perplexity = 30, checkMismatch = true, numberOfThreads = null, ...others } = options; + const { perplexity = 30, neighbors = null, numberOfThreads = null, ...others } = options; utils.checkOtherOptions(others); - var my_neighbors; + var my_nnres; var raw_coords; var output; let nthreads = utils.chooseNumberOfThreads(numberOfThreads); + const k = (neighbors == null ? perplexityToNeighbors(perplexity) : neighbors); + try { - let neighbors; + let nnres; if (x instanceof BuildNeighborSearchIndexResults) { - let k = perplexityToNeighbors(perplexity); - my_neighbors = findNearestNeighbors(x, k, { numberOfThreads: nthreads }); - neighbors = my_neighbors; - + my_nnres = findNearestNeighbors(x, k, { numberOfThreads: nthreads }); + nnres = my_nnres } else { - if (checkMismatch) { - let k = perplexityToNeighbors(perplexity); - if (k * x.numberOfCells() != x.size()) { - throw new Error("number of neighbors in 'x' does not match '3 * perplexity'"); - } + if (k != x.numberOfNeighbors()) { + console.warn("number of neighbors in 'x' does not match 'neighbors'"); } - neighbors = x; + nnres = x; } - raw_coords = utils.createFloat64WasmArray(2 * neighbors.numberOfCells()); - wasm.call(module => module.randomize_tsne_start(neighbors.numberOfCells(), raw_coords.offset, 42)); + raw_coords = utils.createFloat64WasmArray(2 * nnres.numberOfCells()); + wasm.call(module => module.randomize_tsne_start(nnres.numberOfCells(), raw_coords.offset, 42)); output = gc.call( - module => module.initialize_tsne(neighbors.results, perplexity, nthreads), + module => module.initialize_tsne(nnres.results, perplexity, nthreads), TsneStatus, raw_coords ); @@ -156,7 +154,7 @@ export function initializeTsne(x, options = {}) { throw e; } finally { - utils.free(my_neighbors); + utils.free(my_nnres); } return output; @@ -166,13 +164,14 @@ export function initializeTsne(x, options = {}) { * Run the t-SNE algorithm to the specified number of iterations. * This is a wrapper around {@linkcode initializeTsne} and {@linkcode TsneStatus#run run}. * - * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x - * Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}), - * or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}. + * + * Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}. + * The number of neighbors should be equal to `neighbors`, otherwise a warning is raised. * @param {object} [options={}] - Optional parameters. * @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE. - * @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors. - * Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object. + * @param {?number} [options.neighbors=null] - Number of nearest neighbors to find. + * If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}. * @param {?number} [options.numberOfThreads=null] - Number of threads to use. * If `null`, defaults to {@linkcode maximumThreads}. * @param {number} [options.maxIterations=1000] - Maximum number of iterations to perform. @@ -180,9 +179,9 @@ export function initializeTsne(x, options = {}) { * @return {object} Object containing coordinates of the t-SNE embedding, see {@linkcode TsneStatus#extractCoordinates TsneStatus.extractCoordinates} for more details. */ export function runTsne(x, options = {}) { - const { perplexity = 30, checkMismatch = true, numberOfThreads = null, maxIterations = 1000, ...others } = options; + const { perplexity = 30, neighbors = null, numberOfThreads = null, maxIterations = 1000, ...others } = options; utils.checkOtherOptions(others); - let tstat = initializeTsne(x, { perplexity, checkMismatch, numberOfThreads }); + let tstat = initializeTsne(x, { perplexity, neighbors, numberOfThreads }); tstat.run({ maxIterations }); return tstat.extractCoordinates(); } diff --git a/js/runUmap.js b/js/runUmap.js index 9666027a..623c3c92 100644 --- a/js/runUmap.js +++ b/js/runUmap.js @@ -100,9 +100,10 @@ export class UmapStatus { } /** - * @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x - * Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}), - * or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x * A pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}). + * + * Alternatively, a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * The number of neighbors should be equal to `neighbors`, otherwise a warning is raised. * @param {object} [options={}] - Optional parameters. * @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm. * Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object. @@ -117,7 +118,7 @@ export function initializeUmap(x, options = {}) { const { neighbors = 15, epochs = 500, minDist = 0.01, numberOfThreads = null, ...others } = options; utils.checkOtherOptions(others); - var my_neighbors; + var my_nnres; var raw_coords; var output; let nthreads = utils.chooseNumberOfThreads(numberOfThreads); @@ -126,9 +127,12 @@ export function initializeUmap(x, options = {}) { let nnres; if (x instanceof BuildNeighborSearchIndexResults) { - my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads }); - nnres = my_neighbors; + my_nnres = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads }); + nnres = my_nnres; } else { + if (neighbors != x.numberOfNeighbors()) { + console.warn("number of neighbors in 'x' does not match 'neighbors'"); + } nnres = x; } @@ -145,7 +149,7 @@ export function initializeUmap(x, options = {}) { throw e; } finally { - utils.free(my_neighbors); + utils.free(my_nnres); } return output; @@ -155,9 +159,10 @@ export function initializeUmap(x, options = {}) { * Run the UMAP algorithm. * This is a wrapper around {@linkcode initializeUmap} and {@linkcode UmapStatus#run run}. * - * @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x - * Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}), - * or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}). + * @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}. + * + * Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}. + * The number of neighbors should be equal to `neighbors`, otherwise a warning is raised. * @param {object} [options={}] - Optional parameters. * @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm. * Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object. diff --git a/src/NeighborIndex.cpp b/src/NeighborIndex.cpp index 7078309f..4bf3d3bf 100644 --- a/src/NeighborIndex.cpp +++ b/src/NeighborIndex.cpp @@ -60,6 +60,7 @@ EMSCRIPTEN_BINDINGS(build_neighbor_index) { emscripten::class_("NeighborResults") .constructor() .function("num_obs", &NeighborResults::num_obs, emscripten::return_value_policy::take_ownership()) + .function("num_neighbors", &NeighborResults::num_neighbors, emscripten::return_value_policy::take_ownership()) .function("size", &NeighborResults::size, emscripten::return_value_policy::take_ownership()) .function("serialize", &NeighborResults::serialize, emscripten::return_value_policy::take_ownership()); } diff --git a/src/NeighborIndex.h b/src/NeighborIndex.h index 75becb73..590400ca 100644 --- a/src/NeighborIndex.h +++ b/src/NeighborIndex.h @@ -56,6 +56,10 @@ struct NeighborResults { return neighbors.size(); } + int32_t num_neighbors() const { + return (neighbors.empty() ? 0 : neighbors.front().size()); + } + void serialize(uintptr_t runs, uintptr_t indices, uintptr_t distances, int32_t truncate) const { auto rptr = reinterpret_cast(runs); auto iptr = reinterpret_cast(indices); diff --git a/tests/clusterGraph.test.js b/tests/clusterGraph.test.js index 546dc8bb..6dc75afe 100644 --- a/tests/clusterGraph.test.js +++ b/tests/clusterGraph.test.js @@ -12,7 +12,7 @@ test("clusterGraph works as expected", () => { var k = 5; var res = scran.findNearestNeighbors(index, k); - var graph = scran.buildSnnGraph(res); + var graph = scran.buildSnnGraph(res, { neighbors: k }); expect(graph instanceof scran.BuildSnnGraphResults).toBe(true); var clusters = scran.clusterGraph(graph); @@ -49,7 +49,7 @@ test("clusterGraph works with other clustering methods", () => { var k = 5; var res = scran.findNearestNeighbors(index, k); - var graph = scran.buildSnnGraph(res); + var graph = scran.buildSnnGraph(res, { neighbors: k }); var clusters = scran.clusterGraph(graph, { method: "walktrap" }); expect(clusters instanceof scran.ClusterWalktrapResults); diff --git a/tests/findNearestNeighbors.test.js b/tests/findNearestNeighbors.test.js index c2114907..f88303d3 100644 --- a/tests/findNearestNeighbors.test.js +++ b/tests/findNearestNeighbors.test.js @@ -26,6 +26,8 @@ test("neighbor index building works with various inputs", () => { var res1 = scran.findNearestNeighbors(index, k); var res2 = scran.findNearestNeighbors(index2, k); + expect(res1.numberOfNeighbors()).toBe(k); + expect(res2.numberOfNeighbors()).toBe(k); expect(res1.numberOfCells()).toBe(ncells); expect(res2.numberOfCells()).toBe(ncells); expect(res1.size()).toBe(ncells * k); @@ -46,6 +48,16 @@ test("neighbor index building works with various inputs", () => { res2.free(); }); +test("neighbor search works with an empty input", () => { + var ngenes = 1000; + var buffer = scran.createFloat64WasmArray(0); + var index = scran.buildNeighborSearchIndex(buffer, { numberOfDims: ngenes, numberOfCells: 0 }); + var res = scran.findNearestNeighbors(index, 5); + expect(res.numberOfCells()).toBe(0); + expect(res.numberOfNeighbors()).toBe(0); + expect(res.size()).toBe(0); +}) + test("neighbor search works with serialization", () => { var ndim = 5; var ncells = 100; diff --git a/tests/runTsne.test.js b/tests/runTsne.test.js index ab6308c4..2eab789d 100644 --- a/tests/runTsne.test.js +++ b/tests/runTsne.test.js @@ -24,6 +24,12 @@ test("runTsne works as expected", () => { expect(compare.equalArrays(start.x, finished.x)).toBe(false); expect(compare.equalArrays(start.y, finished.y)).toBe(false); + // We get the same results when starting from existing NN results. + let nnres2 = scran.findNearestNeighbors(index, scran.perplexityToNeighbors(30)); + let finished2 = scran.runTsne(nnres2); + expect(finished2.x).toEqual(finished.x); + expect(finished2.y).toEqual(finished.y); + // Cleaning up. index.free(); init.free(); diff --git a/tests/runUmap.test.js b/tests/runUmap.test.js index f0f457f8..088e2599 100644 --- a/tests/runUmap.test.js +++ b/tests/runUmap.test.js @@ -25,6 +25,12 @@ test("runUmap works as expected", () => { expect(compare.equalArrays(start.x, finished.x)).toBe(false); expect(compare.equalArrays(start.y, finished.y)).toBe(false); + // We get the same results when starting from existing NN results. + let nnres2 = scran.findNearestNeighbors(index, 15); + let finished2 = scran.runUmap(nnres2); + expect(finished2.x).toEqual(finished.x); + expect(finished2.y).toEqual(finished.y); + // Cleaning up. index.free(); init.free();