Skip to content

Commit

Permalink
ID mapping for sparse 2D arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jun 22, 2023
1 parent 34643bb commit 61ca593
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,8 +879,7 @@ def _create_from_matrix(
)

if isinstance(soma_ndarray, DenseNDArray):
# XXX JIDMAP -- non-appendable -- ?
# XXX FAIL EARLY
# XXX JIDMAP -- non-appendable -- fail early
_write_matrix_to_denseNDArray(
soma_ndarray,
matrix,
Expand All @@ -899,7 +898,8 @@ def _create_from_matrix(
),
context=context,
ingestion_params=ingestion_params,
# XXX JIDMAP
axis_0_mapping=axis_0_mapping,
axis_1_mapping=axis_1_mapping,
)
else:
raise TypeError(f"unknown array type {type(soma_ndarray)}")
Expand Down Expand Up @@ -1168,17 +1168,32 @@ def _write_matrix_to_sparseNDArray(
tiledb_create_options: TileDBCreateOptions,
context: Optional[SOMATileDBContext],
ingestion_params: IngestionParams,
axis_0_mapping: AxisIDMapping,
axis_1_mapping: AxisIDMapping,
) -> None:
"""Write a matrix to an empty DenseNDArray"""

# XXX JIDMAP
def _coo_to_table(mat_coo: sp.coo_matrix, axis: int = 0, base: int = 0) -> pa.Table:
def _coo_to_table(
mat_coo: sp.coo_matrix,
axis_0_mapping: AxisIDMapping,
axis_1_mapping: AxisIDMapping,
axis: int = 0,
base: int = 0,
) -> pa.Table:

soma_dim_0 = mat_coo.row + base if base > 0 and axis == 0 else mat_coo.row
soma_dim_1 = mat_coo.col + base if base > 0 and axis == 1 else mat_coo.col

# XXX COMMENT
soma_dim_0 = [axis_0_mapping.data[e] for e in soma_dim_0]
soma_dim_1 = [axis_1_mapping.data[e] for e in soma_dim_1]

pydict = {
"soma_data": mat_coo.data,
"soma_dim_0": mat_coo.row + base if base > 0 and axis == 0 else mat_coo.row,
"soma_dim_1": mat_coo.col + base if base > 0 and axis == 1 else mat_coo.col,
"soma_dim_0": soma_dim_0,
"soma_dim_1": soma_dim_1,
}
# XXX RUN THAT THROUGH INT-TO-INT REMAP

return pa.Table.from_pydict(pydict)

# There is a chunk-by-chunk already-done check for resume mode, below.
Expand Down Expand Up @@ -1209,7 +1224,9 @@ def _coo_to_table(mat_coo: sp.coo_matrix, axis: int = 0, base: int = 0) -> pa.Ta

# Write all at once?
if not tiledb_create_options.write_X_chunked:
soma_ndarray.write(_coo_to_table(sp.coo_matrix(matrix)))
soma_ndarray.write(
_coo_to_table(sp.coo_matrix(matrix), axis_0_mapping, axis_1_mapping)
)
return

# Or, write in chunks, striding across the most efficient slice axis
Expand All @@ -1228,7 +1245,7 @@ def _coo_to_table(mat_coo: sp.coo_matrix, axis: int = 0, base: int = 0) -> pa.Ta
goal_chunk_nnz = tiledb_create_options.goal_chunk_nnz

coords = [slice(None), slice(None)]
i = 0 # XXX
i = 0
while i < dim_max_size:
t1 = time.time()

Expand Down Expand Up @@ -1280,7 +1297,9 @@ def _coo_to_table(mat_coo: sp.coo_matrix, axis: int = 0, base: int = 0) -> pa.Ta
% (i, i2 - 1, dim_max_size, chunk_percent, chunk_coo.nnz),
)

soma_ndarray.write(_coo_to_table(chunk_coo, stride_axis, i))
soma_ndarray.write(
_coo_to_table(chunk_coo, axis_0_mapping, axis_1_mapping, stride_axis, i)
)

t2 = time.time()
chunk_seconds = t2 - t1
Expand Down

0 comments on commit 61ca593

Please sign in to comment.