JuliaSparse · alyst · Sep 10, 2024 · Sep 10, 2024 · Sep 9, 2024 · Sep 10, 2024
diff --git a/README.md b/README.md
@@ -2,8 +2,9 @@
 
 [![codecov](https://codecov.io/gh/JuliaSparse/MKLSparse.jl/graph/badge.svg?token=j3KoKBEIt1)](https://codecov.io/gh/JuliaSparse/MKLSparse.jl)
 
-`MKLSparse.jl` is a Julia package to seamlessly use the sparse functionality in MKL to speed up operations on sparse arrays in Julia.
-In order to use `MKLSparse.jl` you do not need to install Intel's MKL library nor build Julia with MKL. `MKLSparse.jl` will automatically download and use the MKL library for you when installed.
+*MKLSparse.jl* is a Julia package to seamlessly use the [sparse BLAS routines from Intel's Math Kernel Library (MKL)](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-2/blas-and-sparse-blas-routines.html)
+to speed up operations on sparse arrays in Julia.
+In order to use *MKLSparse.jl* you do not need to install Intel's MKL library nor build Julia with MKL. *MKLSparse.jl* will automatically download and use the MKL library for you when installed.
 
 ### Matrix multiplications
 

diff --git a/src/generic.jl b/src/generic.jl
@@ -56,6 +56,203 @@ function mm!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_d
     return C
 end
 
+# C := op(A) * B, where C is sparse
+function spmm(transA::Char, A::AbstractSparseMatrix{T}, B::AbstractSparseMatrix{T}) where T
+    check_trans(transA)
+    check_mat_op_sizes(nothing, A, transA, B, 'N')
+    Cout = Ref{sparse_matrix_t}()
+    hA = MKLSparseMatrix(A)
+    hB = MKLSparseMatrix(B)
+    res = mkl_call(Val{:mkl_sparse_spmmI}(), typeof(A),
+                   transA, hA, hB, Cout)
+    destroy(hA)
+    destroy(hB)
+    check_status(res)
+    return MKLSparseMatrix(Cout[])
+end
+
+# C := op(A) * B, where C is dense
+function spmmd!(transa::Char, A::AbstractSparseMatrix{T}, B::AbstractSparseMatrix{T},
+                C::StridedMatrix{T};
+                dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
+) where T
+    check_trans(transa)
+    check_mat_op_sizes(C, A, transa, B, 'N')
+    ldC = stride(C, 2)
+    hA = MKLSparseMatrix(A)
+    hB = MKLSparseMatrix(B)
+    res = mkl_call(Val{:mkl_sparse_T_spmmdI}(), typeof(A),
+                   transa, hA, hB, dense_layout, C, ldC)
+    destroy(hA)
+    destroy(hB)
+    check_status(res)
+    return C
+end
+
+# C := opA(A) * opB(B), where C is sparse
+function sp2m(transA::Char, A::AbstractSparseMatrix{T}, descrA::matrix_descr,
+              transB::Char, B::AbstractSparseMatrix{T}, descrB::matrix_descr) where T
+    check_trans(transA)
+    check_trans(transB)
+    check_mat_op_sizes(nothing, A, transA, B, transB)
+    Cout = Ref{sparse_matrix_t}()
+    hA = MKLSparseMatrix(A)
+    hB = MKLSparseMatrix(B)
+    res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
+                   transA, descrA, hA, transB, descrB, hB,
+                   SPARSE_STAGE_FULL_MULT, Cout)
+    destroy(hA)
+    destroy(hB)
+    check_status(res)
+    # NOTE: we are guessing what is the storage format of C
+    return MKLSparseMatrix{typeof(A)}(Cout[])
+end
+
+# C := opA(A) * opB(B), where C is sparse, in-place version
+# C should have the correct size and sparsity pattern
+function sp2m!(transA::Char, A::AbstractSparseMatrix{T}, descrA::matrix_descr,
+               transB::Char, B::AbstractSparseMatrix{T}, descrB::matrix_descr,
+               C::SparseMatrixCSC{T};
+               check_nzpattern::Bool = true
+) where T
+    check_trans(transA)
+    check_trans(transB)
+    check_mat_op_sizes(C, A, transA, B, transB)
+    hA = MKLSparseMatrix(A)
+    hB = MKLSparseMatrix(B)
+    if check_nzpattern
+        # pre-multiply A * B to get the number of nonzeros per column in the result
+        CptnOut = Ref{sparse_matrix_t}()
+        res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
+                    transA, descrA, hA, transB, descrB, hB,
+                    SPARSE_STAGE_NNZ_COUNT, CptnOut)
+        check_status(res)
+        hCptn = MKLSparseMatrix{typeof(A)}(CptnOut[])
+        try
+            # check if C has the same per-column nonzeros as the result
+            _C = extract_data(hCptn)
+            _Cnnz = _C.major_starts[end] - 1
+            nnz(C) == _Cnnz || error(lazy"Number of nonzeros in the destination matrix ($(nnz(C))) does not match the result ($(_Cnnz))")
+            C.colptr == _C.major_starts || error(lazy"Nonzeros structure of the destination matrix does not match the result")
+        catch e
+            # destroy handles to A and B if the pattern check fails,
+            # otherwise reuse them at the actual multiplication
+            destroy(hA)
+            destroy(hB)
+            rethrow(e)
+        finally
+            destroy(hCptn)
+        end
+        # FIXME rowval not checked
+    end
+    # FIXME the optimal way would be to create the MKLSparse handle to C reusing its arrays
+    # and do SPARSE_STAGE_FINALIZE_MULT to directly write to the C.nzval
+    # but that causes segfaults when the handle is destroyed
+    # (also the partial mkl_sparse_copy(C) workaround to reuse the nz structure segfaults)
+    #hC = MKLSparseMatrix(C)
+    #hC_ref = Ref(hC)
+    #res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
+    #               transA, descrA, hA, transB, descrB, hB,
+    #               SPARSE_STAGE_FINALIZE_MULT, hC_ref)
+    #@assert hC_ref[] == hC
+    # so instead we do the full multiplication and copy the result into C nzvals
+    hCopy_ref = Ref{sparse_matrix_t}()
+    res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
+                   transA, descrA, hA, transB, descrB, hB,
+                   SPARSE_STAGE_FULL_MULT, hCopy_ref)
+    destroy(hA)
+    destroy(hB)
+    check_status(res)
+    if hCopy_ref[] != C_NULL
+        hCopy = MKLSparseMatrix{typeof(A)}(hCopy_ref[])
+        copy!(C, hCopy; check_nzpattern)
+        destroy(hCopy)
+    end
+    return C
+end
+
+# C := alpha * opA(A) * opB(B) + beta * C, where C is dense
+function sp2md!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descrA::matrix_descr,
+                transB::Char, B::AbstractSparseMatrix{T}, descrB::matrix_descr,
+                beta::T, C::StridedMatrix{T};
+                dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
+) where T
+    check_trans(transA)
+    check_trans(transB)
+    check_mat_op_sizes(C, A, transA, B, transB)
+    ldC = stride(C, 2)
+    hA = MKLSparseMatrix(A)
+    hB = MKLSparseMatrix(B)
+    res = mkl_call(Val{:mkl_sparse_T_sp2mdI}(), typeof(A),
+                   transA, descrA, hA, transB, descrB, hB,
+                   alpha, beta,
+                   C, dense_layout, ldC)
+    if res != SPARSE_STATUS_SUCCESS
+        @show transA descrA transB descrB
+    end
+    destroy(hA)
+    destroy(hB)
+    check_status(res)
+    return C
+end
+
+# C := A * op(A), or
+# C := op(A) * A, where C is sparse
+# note: only the upper triangular part of C is computed
+function syrk(transA::Char, A::AbstractSparseMatrix{T}) where T
+    check_trans(transA)
+    Cout = Ref{sparse_matrix_t}()
+    hA = MKLSparseMatrix(A)
+    res = mkl_call(Val{:mkl_sparse_syrkI}(), typeof(A),
+                   transA, hA, Cout)
+    destroy(hA)
+    check_status(res)
+    return MKLSparseMatrix(Cout[])
+end
+
+# C := A * op(A), or
+# C := op(A) * A, where C is dense
+# note: only the upper triangular part of C is computed
+function syrkd!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, beta::T,
+                C::StridedMatrix{T};
+                dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
+) where T
+    check_trans(transA)
+    check_mat_op_sizes(C, A, transA, A, transA == 'N' ? 'T' : 'N'; dense_layout)
+    ldC = stride(C, 2)
+    hA = MKLSparseMatrix(A)
+    res = mkl_call(Val{:mkl_sparse_T_syrkdI}(), typeof(A),
+                   transA, hA, alpha, beta, C, dense_layout, ldC)
+    destroy(hA)
+    check_status(res)
+    return C
+end
+
+# C := alpha * op(A) * B * A + beta * C, or
+# C := alpha * A * B * op(A) + beta * C, where C is dense
+# note: only the upper triangular part of C is computed
+function syprd!(transA::Char, alpha::T, A::AbstractSparseMatrix{T},
+                B::StridedMatrix{T}, beta::T, C::StridedMatrix{T};
+                dense_layout_B::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
+                dense_layout_C::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
+) where T
+    check_trans(transA)
+    # FIXME dense_layout_B not used
+    check_mat_op_sizes(C, A, transA, B, 'N';
+                       check_result_columns = false, dense_layout = dense_layout_C)
+    check_mat_op_sizes(C, B, 'N', A, transA == 'N' ? 'T' : 'N';
+                       check_result_rows = false, dense_layout = dense_layout_C)
+    ldB = stride(B, 2)
+    ldC = stride(C, 2)
+    hA = MKLSparseMatrix(A)
+    res = mkl_call(Val{:mkl_sparse_T_syprdI}(), typeof(A),
+                   transA, hA, B, dense_layout_B, ldB,
+                   alpha, beta, C, dense_layout_C, ldC)
+    destroy(hA)
+    check_status(res)
+    return C
+end
+
 # find y: op(A) * y = alpha * x
 function trsv!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_descr,
                x::StridedVector{T}, y::StridedVector{T}