Skip to content

Commit

Permalink
Emit warnings if the requested and observed number of neighbors are d…
Browse files Browse the repository at this point in the history
…ifferent. (#94)

This is a quality-of-life improvement so that the functions don't silently
ignore the user-specified number of neighbors in favor of the number in the
search results. Now, at least the user knows that there's a difference.
  • Loading branch information
LTLA authored Oct 15, 2024
1 parent 1928c18 commit e855f6b
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 46 deletions.
13 changes: 9 additions & 4 deletions js/buildSnnGraph.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ export class BuildSnnGraphResults {
}

/**
* Build a shared nearest graph.
* Build a shared nearest graph where each cell is a node.
* Edges are formed between cells that share one or more nearest neighbors, weighted by the number or rank of those shared neighbors.
*
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.scheme="rank"] - Weighting scheme for the edges between cells.
* This can be based on the top ranks of the shared neighbors (`"rank"`),
Expand All @@ -64,6 +66,9 @@ export function buildSnnGraph(x, options = {}) {
try {
let ref;
if (x instanceof FindNearestNeighborsResults) {
if (neighbors != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
ref = x;
} else {
my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
Expand Down
9 changes: 8 additions & 1 deletion js/findNearestNeighbors.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ export function buildNeighborSearchIndex(x, options = {}) {
export class FindNearestNeighborsResults {
#id;
#results;

u
constructor(id, raw) {
this.#id = id;
this.#results = raw;
Expand All @@ -141,6 +141,13 @@ export class FindNearestNeighborsResults {
return this.#results.num_obs();
}

/**
* @return {number} Number of neighbors that were requested in the search.
*/
numberOfNeighbors() {
return this.#results.num_neighbors();
}

// Internal use only, not documented.
get results() {
return this.#results;
Expand Down
57 changes: 28 additions & 29 deletions js/runTsne.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,49 +103,47 @@ export function perplexityToNeighbors(perplexity) {
}

/**
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE.
* @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors.
* Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object.
* @param {?number} [options.neighbors=null] - Number of nearest neighbors to find.
* If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
*
* @return {TsneStatus} Object containing the initial status of the t-SNE algorithm.
*/
export function initializeTsne(x, options = {}) {
const { perplexity = 30, checkMismatch = true, numberOfThreads = null, ...others } = options;
const { perplexity = 30, neighbors = null, numberOfThreads = null, ...others } = options;
utils.checkOtherOptions(others);

var my_neighbors;
var my_nnres;
var raw_coords;
var output;
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);

const k = (neighbors == null ? perplexityToNeighbors(perplexity) : neighbors);

try {
let neighbors;
let nnres;

if (x instanceof BuildNeighborSearchIndexResults) {
let k = perplexityToNeighbors(perplexity);
my_neighbors = findNearestNeighbors(x, k, { numberOfThreads: nthreads });
neighbors = my_neighbors;

my_nnres = findNearestNeighbors(x, k, { numberOfThreads: nthreads });
nnres = my_nnres
} else {
if (checkMismatch) {
let k = perplexityToNeighbors(perplexity);
if (k * x.numberOfCells() != x.size()) {
throw new Error("number of neighbors in 'x' does not match '3 * perplexity'");
}
if (k != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
neighbors = x;
nnres = x;
}

raw_coords = utils.createFloat64WasmArray(2 * neighbors.numberOfCells());
wasm.call(module => module.randomize_tsne_start(neighbors.numberOfCells(), raw_coords.offset, 42));
raw_coords = utils.createFloat64WasmArray(2 * nnres.numberOfCells());
wasm.call(module => module.randomize_tsne_start(nnres.numberOfCells(), raw_coords.offset, 42));
output = gc.call(
module => module.initialize_tsne(neighbors.results, perplexity, nthreads),
module => module.initialize_tsne(nnres.results, perplexity, nthreads),
TsneStatus,
raw_coords
);
Expand All @@ -156,7 +154,7 @@ export function initializeTsne(x, options = {}) {
throw e;

} finally {
utils.free(my_neighbors);
utils.free(my_nnres);
}

return output;
Expand All @@ -166,23 +164,24 @@ export function initializeTsne(x, options = {}) {
* Run the t-SNE algorithm to the specified number of iterations.
* This is a wrapper around {@linkcode initializeTsne} and {@linkcode TsneStatus#run run}.
*
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE.
* @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors.
* Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object.
* @param {?number} [options.neighbors=null] - Number of nearest neighbors to find.
* If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
* @param {number} [options.maxIterations=1000] - Maximum number of iterations to perform.
*
* @return {object} Object containing coordinates of the t-SNE embedding, see {@linkcode TsneStatus#extractCoordinates TsneStatus.extractCoordinates} for more details.
*/
export function runTsne(x, options = {}) {
const { perplexity = 30, checkMismatch = true, numberOfThreads = null, maxIterations = 1000, ...others } = options;
const { perplexity = 30, neighbors = null, numberOfThreads = null, maxIterations = 1000, ...others } = options;
utils.checkOtherOptions(others);
let tstat = initializeTsne(x, { perplexity, checkMismatch, numberOfThreads });
let tstat = initializeTsne(x, { perplexity, neighbors, numberOfThreads });
tstat.run({ maxIterations });
return tstat.extractCoordinates();
}
25 changes: 15 additions & 10 deletions js/runUmap.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ export class UmapStatus {
}

/**
* @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x * A pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}).
*
* Alternatively, a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm.
* Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object.
Expand All @@ -117,7 +118,7 @@ export function initializeUmap(x, options = {}) {
const { neighbors = 15, epochs = 500, minDist = 0.01, numberOfThreads = null, ...others } = options;
utils.checkOtherOptions(others);

var my_neighbors;
var my_nnres;
var raw_coords;
var output;
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);
Expand All @@ -126,9 +127,12 @@ export function initializeUmap(x, options = {}) {
let nnres;

if (x instanceof BuildNeighborSearchIndexResults) {
my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
nnres = my_neighbors;
my_nnres = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
nnres = my_nnres;
} else {
if (neighbors != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
nnres = x;
}

Expand All @@ -145,7 +149,7 @@ export function initializeUmap(x, options = {}) {
throw e;

} finally {
utils.free(my_neighbors);
utils.free(my_nnres);
}

return output;
Expand All @@ -155,9 +159,10 @@ export function initializeUmap(x, options = {}) {
* Run the UMAP algorithm.
* This is a wrapper around {@linkcode initializeUmap} and {@linkcode UmapStatus#run run}.
*
* @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm.
* Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object.
Expand Down
1 change: 1 addition & 0 deletions src/NeighborIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ EMSCRIPTEN_BINDINGS(build_neighbor_index) {
emscripten::class_<NeighborResults>("NeighborResults")
.constructor<size_t, uintptr_t, uintptr_t, uintptr_t>()
.function("num_obs", &NeighborResults::num_obs, emscripten::return_value_policy::take_ownership())
.function("num_neighbors", &NeighborResults::num_neighbors, emscripten::return_value_policy::take_ownership())
.function("size", &NeighborResults::size, emscripten::return_value_policy::take_ownership())
.function("serialize", &NeighborResults::serialize, emscripten::return_value_policy::take_ownership());
}
4 changes: 4 additions & 0 deletions src/NeighborIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ struct NeighborResults {
return neighbors.size();
}

int32_t num_neighbors() const {
return (neighbors.empty() ? 0 : neighbors.front().size());
}

void serialize(uintptr_t runs, uintptr_t indices, uintptr_t distances, int32_t truncate) const {
auto rptr = reinterpret_cast<int32_t*>(runs);
auto iptr = reinterpret_cast<int32_t*>(indices);
Expand Down
4 changes: 2 additions & 2 deletions tests/clusterGraph.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test("clusterGraph works as expected", () => {

var k = 5;
var res = scran.findNearestNeighbors(index, k);
var graph = scran.buildSnnGraph(res);
var graph = scran.buildSnnGraph(res, { neighbors: k });
expect(graph instanceof scran.BuildSnnGraphResults).toBe(true);

var clusters = scran.clusterGraph(graph);
Expand Down Expand Up @@ -49,7 +49,7 @@ test("clusterGraph works with other clustering methods", () => {

var k = 5;
var res = scran.findNearestNeighbors(index, k);
var graph = scran.buildSnnGraph(res);
var graph = scran.buildSnnGraph(res, { neighbors: k });

var clusters = scran.clusterGraph(graph, { method: "walktrap" });
expect(clusters instanceof scran.ClusterWalktrapResults);
Expand Down
12 changes: 12 additions & 0 deletions tests/findNearestNeighbors.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ test("neighbor index building works with various inputs", () => {
var res1 = scran.findNearestNeighbors(index, k);
var res2 = scran.findNearestNeighbors(index2, k);

expect(res1.numberOfNeighbors()).toBe(k);
expect(res2.numberOfNeighbors()).toBe(k);
expect(res1.numberOfCells()).toBe(ncells);
expect(res2.numberOfCells()).toBe(ncells);
expect(res1.size()).toBe(ncells * k);
Expand All @@ -46,6 +48,16 @@ test("neighbor index building works with various inputs", () => {
res2.free();
});

test("neighbor search works with an empty input", () => {
var ngenes = 1000;
var buffer = scran.createFloat64WasmArray(0);
var index = scran.buildNeighborSearchIndex(buffer, { numberOfDims: ngenes, numberOfCells: 0 });
var res = scran.findNearestNeighbors(index, 5);
expect(res.numberOfCells()).toBe(0);
expect(res.numberOfNeighbors()).toBe(0);
expect(res.size()).toBe(0);
})

test("neighbor search works with serialization", () => {
var ndim = 5;
var ncells = 100;
Expand Down
6 changes: 6 additions & 0 deletions tests/runTsne.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ test("runTsne works as expected", () => {
expect(compare.equalArrays(start.x, finished.x)).toBe(false);
expect(compare.equalArrays(start.y, finished.y)).toBe(false);

// We get the same results when starting from existing NN results.
let nnres2 = scran.findNearestNeighbors(index, scran.perplexityToNeighbors(30));
let finished2 = scran.runTsne(nnres2);
expect(finished2.x).toEqual(finished.x);
expect(finished2.y).toEqual(finished.y);

// Cleaning up.
index.free();
init.free();
Expand Down
6 changes: 6 additions & 0 deletions tests/runUmap.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ test("runUmap works as expected", () => {
expect(compare.equalArrays(start.x, finished.x)).toBe(false);
expect(compare.equalArrays(start.y, finished.y)).toBe(false);

// We get the same results when starting from existing NN results.
let nnres2 = scran.findNearestNeighbors(index, 15);
let finished2 = scran.runUmap(nnres2);
expect(finished2.x).toEqual(finished.x);
expect(finished2.y).toEqual(finished.y);

// Cleaning up.
index.free();
init.free();
Expand Down

0 comments on commit e855f6b

Please sign in to comment.