Skip to content

Commit

Permalink
Attempt to get the matrix initialization code running.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Jul 19, 2023
1 parent a81437b commit a4f5ac8
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 237 deletions.
29 changes: 0 additions & 29 deletions js/ScranMatrix.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,33 +146,4 @@ export class ScranMatrix {
isSparse() {
return this.#matrix.sparse();
}

// Internal use only, by initialize* functions.
isReorganized() {
return this.#matrix.reorganized();
}

// Internal use only, by initialize* functions.
identities({ buffer = null } = {}) {
if (buffer != null) {
this.#matrix.identities(buffer.offset);
return buffer.array();
} else {
var output;
buffer = utils.createInt32WasmArray(this.#matrix.nrow());
try {
this.#matrix.identities(buffer.offset);
output = buffer.slice();
} finally {
buffer.free();
}
return output;
}
}

// Internal use only, by initialize* functions.
wipeIdentities() {
this.#matrix.wipe_identities();
return;
}
}
109 changes: 22 additions & 87 deletions js/initializeSparseMatrix.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,15 @@ import { ScranMatrix } from "./ScranMatrix.js";
* This is generally expected to contain non-negative integers; otherwise, users should set `forceInteger = false`.
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.forceInteger=true] - Whether to coerce `values` to integers via truncation.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, which reorders the rows of the loaded matrix for better memory efficiency.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details.
* Only used if `values` contains an integer type and/or `forceInteger = true`.
* Setting to `true` assumes that `values` contains only non-negative integers.
*
* @return {object} An object containing:
* - `matrix`, a {@linkplain ScranMatrix} containing the sparse matrix data.
* If layering is enabled, rows are shuffled to enable use of smaller integer types for low-abundance features.
* - `row_ids`, an Int32Array specifying the identity of each row in `matrix`.
* This can be interpreted as the row slicing that was applied to the original matrix to obtain `matrix`.
* If layering is not enabled, this is `null`.
*
* Layering is enabled if the matrix contains integer data (either directly or via `forceInteger = true`) and `layered = true`.
* @return {ScranMatrix} Matrix containing sparse data.
*/
export function initializeSparseMatrixFromDenseArray(numberOfRows, numberOfColumns, values, { forceInteger = true, layered = true } = {}) {
var val_data;
var output;
var ids = null;

try {
val_data = utils.wasmifyArray(values, null);
Expand All @@ -48,11 +40,6 @@ export function initializeSparseMatrixFromDenseArray(numberOfRows, numberOfColum
ScranMatrix
);

if (output.isReorganized()) {
ids = output.identities();
output.wipeIdentities();
}

} catch (e) {
utils.free(output);
throw e;
Expand All @@ -61,7 +48,7 @@ export function initializeSparseMatrixFromDenseArray(numberOfRows, numberOfColum
utils.free(val_data);
}

return { "matrix": output, "row_ids": ids };
return output;
}

/**
Expand All @@ -76,28 +63,20 @@ export function initializeSparseMatrixFromDenseArray(numberOfRows, numberOfColum
* @param {WasmArray} pointers Pointers specifying the start of each column in `indices`.
* This should have length equal to `numberOfColumns + 1`.
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.byColumn=true] - Whether the input arrays are supplied in the compressed sparse column format.
* @param {boolean} [options.byRow=true] - Whether the input arrays are supplied in the compressed sparse column format.
* If `true`, `indices` should contain column indices and `pointers` should specify the start of each row in `indices`.
* @param {boolean} [options.forceInteger=true] - Whether to coerce `values` to integers via truncation.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, which reorders the rows of the loaded matrix for better memory efficiency.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details.
* Only used if `values` contains an integer type and/or `forceInteger = true`.
* Setting to `true` assumes that `values` contains only non-negative integers.
*
* @return {object} An object containing:
* - `matrix`, a {@linkplain ScranMatrix} containing the sparse matrix data.
* If layering is enabled, rows are shuffled to enable use of smaller integer types for low-abundance features.
* - `row_ids`, an Int32Array specifying the identity of each row in `matrix`.
* This can be interpreted as the row slicing that was applied to the original matrix to obtain `matrix`.
* If layering is not enabled, this is `null`.
*
* Layering is enabled if the matrix contains integer data (either directly or via `forceInteger = true`) and `layered = true`.
* @return {ScranMatrix} Matrix containing sparse data.
*/
export function initializeSparseMatrixFromCompressedVectors(numberOfRows, numberOfColumns, values, indices, pointers, { byColumn = true, forceInteger = true, layered = true } = {}) {
export function initializeSparseMatrixFromCompressedVectors(numberOfRows, numberOfColumns, values, indices, pointers, { byRow = true, forceInteger = true, layered = true } = {}) {
var val_data;
var ind_data;
var indp_data;
var output;
var ids = null;

try {
val_data = utils.wasmifyArray(values, null);
Expand All @@ -106,7 +85,7 @@ export function initializeSparseMatrixFromCompressedVectors(numberOfRows, number
if (val_data.length != ind_data.length) {
throw new Error("'values' and 'indices' should have the same length");
}
if (indp_data.length != (byColumn ? numberOfColumns : numberOfRows) + 1) {
if (indp_data.length != (byRow ? numberOfRows : numberOfColumns) + 1) {
throw new Error("'pointers' does not have an appropriate length");
}

Expand All @@ -121,18 +100,13 @@ export function initializeSparseMatrixFromCompressedVectors(numberOfRows, number
ind_data.constructor.className.replace("Wasm", ""),
indp_data.offset,
indp_data.constructor.className.replace("Wasm", ""),
byColumn,
byRow,
forceInteger,
layered
),
ScranMatrix
);

if (output.isReorganized()) {
ids = output.identities();
output.wipeIdentities();
}

} catch (e) {
utils.free(output);
throw e;
Expand All @@ -143,7 +117,7 @@ export function initializeSparseMatrixFromCompressedVectors(numberOfRows, number
utils.free(indp_data);
}

return { "matrix": output, "row_ids": ids };
return output;
}

/**
Expand All @@ -157,19 +131,13 @@ export function initializeSparseMatrixFromCompressedVectors(numberOfRows, number
* @param {object} [options={}] - Optional parameters.
* @param {?boolean} [options.compressed=null] - Whether the buffer is Gzip-compressed.
* If `null`, we detect this automatically from the magic number in the header.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, which reorders the rows of the loaded matrix for better memory efficiency.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details.
*
* @return {object} An object containing:
* - `matrix`, a {@linkplain ScranMatrix} containing the sparse matrix data.
* If `layered = true`, rows are shuffled to enable use of smaller integer types for low-abundance features.
* - `row_ids`, an Int32Array specifying the identity of each row in `matrix`.
* This can be interpreted as the row slicing that was applied to the original matrix to obtain `matrix`.
* If `layered = false`, this is `null`.
* @return {ScranMatrix} Matrix containing sparse data.
*/
export function initializeSparseMatrixFromMatrixMarket(x, { compressed = null, layered = true } = {}) {
var buf_data;
var output;
var ids = null;

try {
compressed = convert_compressed(compressed);
Expand All @@ -186,11 +154,6 @@ export function initializeSparseMatrixFromMatrixMarket(x, { compressed = null, l
);
}

if (output.isReorganized()) {
ids = output.identities();
output.wipeIdentities();
}

} catch(e) {
utils.free(output);
throw e;
Expand All @@ -199,7 +162,7 @@ export function initializeSparseMatrixFromMatrixMarket(x, { compressed = null, l
utils.free(buf_data);
}

return { "matrix": output, "row_ids": ids };
return output;
}

function convert_compressed(compressed) {
Expand Down Expand Up @@ -263,27 +226,17 @@ export function extractMatrixMarketDimensions(x, { compressed = null } = {}) {
* For the latter, both H5AD and 10X-style sparse formats are supported.
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.forceInteger=true] - Whether to coerce all elements to integers via truncation.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, which reorders the rows of the loaded matrix for better memory efficiency.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details.
* Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`.
* Setting to `true` assumes that the matrix contains only non-negative integers.
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract.
* All indices must be non-negative integers less than the number of rows in the sparse matrix.
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract.
* All indices must be non-negative integers less than the number of columns in the sparse matrix.
* @param {number} [options.cacheSize=100000000] - Size of the cache for loading chunks from HDF5 files.
* Only really relevant when reading dense matrices, where a larger cache size may be necessary for handling large chunk dimensions efficiently.
*
* @return {object} An object containing:
* - `matrix`, a {@linkplain ScranMatrix} containing the sparse matrix data.
* If layering is enabled, rows are shuffled to enable use of smaller integer types for low-abundance features.
* - `row_ids`, an Int32Array specifying the identity of each row in `matrix`.
* This can be interpreted as the row slicing that was applied to the original matrix to obtain `matrix`.
* If layering is not enabled, this is `null`.
* If `subsetRow` was provided, `row_ids` contains indices into `subsetRow`, i.e., the i-th row in `matrix` is the `subsetRow[row_ids[i]]` row in the original matrix.
*
* Layering is enabled if the matrix contains integer data (either directly or via `forceInteger = true`) and `layered = true`.
* @return {ScranMatrix} Matrix containing sparse data.
*/
export function initializeSparseMatrixFromHDF5(file, name, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null, cacheSize = 100000000 } = {}) {
export function initializeSparseMatrixFromHDF5(file, name, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) {
var ids = null;
var output;
let wasm_row, wasm_col;
Expand All @@ -306,15 +259,10 @@ export function initializeSparseMatrixFromHDF5(file, name, { forceInteger = true
}

output = gc.call(
module => module.read_hdf5_matrix(file, name, forceInteger, layered, use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length, cacheSize),
module => module.read_hdf5_matrix(file, name, forceInteger, layered, use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length),
ScranMatrix
);

if (output.isReorganized()) {
ids = output.identities();
output.wipeIdentities();
}

} catch(e) {
utils.free(output);
throw e;
Expand All @@ -323,7 +271,7 @@ export function initializeSparseMatrixFromHDF5(file, name, { forceInteger = true
utils.free(wasm_col);
}

return { "matrix": output, "row_ids": ids };
return output;
}

/**
Expand Down Expand Up @@ -404,26 +352,19 @@ export function initializeDenseMatrixFromDenseArray(numberOfRows, numberOfColumn
}

/**
* Initialize a layered sparse matrix from an RDS file.
* Initialize a sparse matrix from an RDS file.
*
* @param {RdsObject} x - Handle to an object inside an RDS file.
* This should be an integer/numeric matrix, `dgCMatrix` or `dgTMatrix` object.
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.consume=false] - Whether to consume the values in `x` when creating the output sparse matrix.
* Setting this to `true` improves memory efficiency at the cost of preventing any further use of `x`.
* @param {boolean} [options.forceInteger=true] - Whether to coerce all elements to integers via truncation.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, which reorders the rows of the loaded matrix for better memory efficiency.
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details.
* Only used if the R matrix is of an integer type and/or `forceInteger = true`.
* Setting to `true` assumes that the matrix contains only non-negative integers.
*
* @return {object} An object containing:
* - `matrix`, a {@linkplain ScranMatrix} containing the sparse matrix data.
* If layering is enabled, rows are shuffled to enable use of smaller integer types for low-abundance features.
* - `row_ids`, an Int32Array specifying the identity of each row in `matrix`.
* This can be interpreted as the row slicing that was applied to the original matrix to obtain `matrix`.
* If layering is not enabled, this is `null`.
*
* Layering is enabled if the matrix contains integer data (either directly or via `forceInteger = true`) and `layered = true`.
* @return {ScranMatrix} Sparse matrix.
*/
export function initializeSparseMatrixFromRds(x, { consume = false, forceInteger = true, layered = true } = {}) {
var ids = null;
Expand All @@ -434,16 +375,10 @@ export function initializeSparseMatrixFromRds(x, { consume = false, forceInteger
module => module.initialize_sparse_matrix_from_rds(x.object.$$.ptr, forceInteger, layered, consume),
ScranMatrix
);

if (output.isReorganized()) {
ids = output.identities();
output.wipeIdentities();
}

} catch(e) {
utils.free(output);
throw e;
}

return { "matrix": output, "row_ids": ids };
return output;
}
20 changes: 10 additions & 10 deletions src/initialize_sparse_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,23 @@ NumericMatrix initialize_sparse_matrix_internal(size_t nrows, size_t ncols, size
uintptr_t values, const std::string& value_type,
uintptr_t indices, const std::string& index_type,
uintptr_t indptrs, const std::string& indptr_type,
bool csc, bool layered)
bool by_row, bool layered)
{
auto val = create_SomeNumericArray<T>(values, nelements, value_type);
auto idx = create_SomeNumericArray<int>(indices, nelements, index_type);

if (csc && !layered) {
// Directly creating a CSC matrix.
if (by_row && !layered) {
// Directly creating a CSR matrix.
auto ind = create_SomeNumericArray<size_t>(indptrs, ncols + 1, indptr_type);
return copy_into_sparse<T>(nrows, ncols, val, idx, ind);
} else {
std::shared_ptr<tatami::Matrix<T, int> > mat;
if (csc) {
auto ind = create_SomeNumericArray<size_t>(indptrs, ncols + 1, indptr_type);
mat.reset(new tatami::CompressedSparseColumnMatrix<T, int, decltype(val), decltype(idx), decltype(ind)>(nrows, ncols, val, idx, ind));
} else {
if (by_row) {
auto ind = create_SomeNumericArray<size_t>(indptrs, nrows + 1, indptr_type);
mat.reset(new tatami::CompressedSparseRowMatrix<T, int, decltype(val), decltype(idx), decltype(ind)>(nrows, ncols, val, idx, ind));
} else {
auto ind = create_SomeNumericArray<size_t>(indptrs, ncols + 1, indptr_type);
mat.reset(new tatami::CompressedSparseColumnMatrix<T, int, decltype(val), decltype(idx), decltype(ind)>(nrows, ncols, val, idx, ind));
}
return sparse_from_tatami(mat.get(), layered);
}
Expand All @@ -96,12 +96,12 @@ NumericMatrix initialize_sparse_matrix(size_t nrows, size_t ncols, size_t neleme
uintptr_t values, std::string value_type,
uintptr_t indices, std::string index_type,
uintptr_t indptrs, std::string indptr_type,
bool csc, bool force_integer, bool layered)
bool by_row, bool force_integer, bool layered)
{
if (force_integer || is_type_integer(value_type)) {
return initialize_sparse_matrix_internal<int>(nrows, ncols, nelements, values, value_type, indices, index_type, indptrs, indptr_type, csc, layered);
return initialize_sparse_matrix_internal<int>(nrows, ncols, nelements, values, value_type, indices, index_type, indptrs, indptr_type, by_row, layered);
} else {
return initialize_sparse_matrix_internal<double>(nrows, ncols, nelements, values, value_type, indices, index_type, indptrs, indptr_type, csc, false);
return initialize_sparse_matrix_internal<double>(nrows, ncols, nelements, values, value_type, indices, index_type, indptrs, indptr_type, by_row, false);
}
}

Expand Down
Loading

0 comments on commit a4f5ac8

Please sign in to comment.