From 2c6874274657c131c9537daf3eb7f63831d0f424 Mon Sep 17 00:00:00 2001 From: aamijar Date: Sat, 17 Aug 2024 18:43:25 +0000 Subject: [PATCH 01/23] init --- cpp/include/raft/sparse/solver/lanczos.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 1aa56d6ba2..462adc3703 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -21,6 +21,7 @@ #include #include + namespace raft::sparse::solver { // ========================================================= From 0f5bdcb0dc1d0b31a89d3fef7f7efcf870ccbdd4 Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 20 Aug 2024 22:54:59 +0000 Subject: [PATCH 02/23] benchmarking lanczos working --- .../raft/sparse/solver/detail/lanczos.cuh | 1161 +++++++++++++++++ cpp/include/raft/sparse/solver/lanczos.cuh | 18 + 2 files changed, 1179 insertions(+) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 9ecb4b729f..020fa2da0c 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -27,6 +27,50 @@ #include #include #include +// #include + +#include "raft/core/detail/macros.hpp" +#include "raft/core/device_mdspan.hpp" +#include "raft/core/host_mdarray.hpp" +#include "raft/core/host_mdspan.hpp" +#include "raft/core/mdspan_types.hpp" +#include "raft/linalg/detail/add.cuh" +#include "raft/linalg/detail/gemv.hpp" +#include +#include +#include +#include +#include +// #include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// #include +// #include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include @@ -1396,4 +1440,1121 @@ int computeLargestEigenvectors( return status; } + +template +RAFT_KERNEL kernel_subtract_and_scale( + T* u, + T* vec, + T* scalar, + int n +) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < n) { + u[idx] -= (*scalar) * vec[idx]; + } +} + + +template +RAFT_KERNEL kernel_get_last_row( + const T* M, + T* S, + int numRows, + int numCols +) { + int col = threadIdx.x + blockIdx.x * blockDim.x; + // Ensure the thread index is within the matrix width + if (col < numCols) { + // Index in the column-major order matrix + int index = (numRows - 1) + col * numRows; + // Copy the value to the last row array + S[col] = M[index]; + } +} + + +template +RAFT_KERNEL kernel_triangular_populate( + T* M, + const T* beta, + int n +) { + // int row = blockIdx.x * blockDim.x + threadIdx.x; + // if (row < n) { + // // Upper diagonal + // if (row < n - 1) { + // M[row * n + (row + 1)] = beta[row]; + // } + + // // Lower diagonal + // if (row > 0) { + // M[row * n + (row - 1)] = beta[row - 1]; + // } + // } + int row = blockIdx.x * blockDim.x + threadIdx.x; + + if (row < n) { + // Upper diagonal: M[row + 1, row] in column-major + if (row < n - 1) { + M[(row + 1) * n + row] = beta[row]; + } + + // Lower diagonal: M[row - 1, row] in column-major + if (row > 0) { + M[(row - 1) * n + row] = beta[row - 1]; + } + } +} + +template +RAFT_KERNEL kernel_triangular_beta_k( + T* t, + const T* beta_k, + int k, + int n) +{ + // int tid = threadIdx.x + blockIdx.x * blockDim.x; + // if (tid < k) { + // // Update the k-th row + // t[k * n + tid] = beta_k[tid]; + // // Update the k-th column + // t[tid * n + k] = beta_k[tid]; + // } + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < k) { + // Update the k-th column: t[i, k] -> t[k * n + i] in column-major + t[tid * n + k] = beta_k[tid]; + + // Update the k-th row: t[k, j] -> t[j * n + k] in column-major + t[k * n + tid] = beta_k[tid]; + } +} + +template +RAFT_KERNEL kernel_normalize( + const T* u, + const T* beta, + int j, + int n, + T* v, + T* V, + int size +) { + // FIXME: custom cuda kernel vs raft primitives? + // # Normalize + // _kernel_normalize(u, beta, i, n, v, V) + + // _kernel_normalize = cupy.ElementwiseKernel( + // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', + // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') + + // v = u / beta[j]; + // V[i + (j+1) * n] = v; + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < size) { + if (beta[j] == 0) { + v[i] = u[i] / 1; + } else { + v[i] = u[i] / beta[j]; + } + V[i+ (j+1) * n] = v[i]; + } +} + + +template +RAFT_KERNEL kernel_clamp_down( + T* value, + T threshold +) +{ + *value = (fabs(*value) < threshold) ? 0 : *value; +} + +template +RAFT_KERNEL kernel_clamp_down_vector( + T* vec, + T threshold, + int size +) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < size) { + vec[idx] = (fabs(vec[idx]) < threshold) ? 0 : vec[idx]; + } +} + +template +void cupy_solve_ritz( + raft::resources const& handle, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + std::optional> beta_k, + index_type_t k, + int which, + int ncv, + raft::device_matrix_view eigenvectors, + raft::device_vector_view eigenvalues +) +{ + + // # Note: This is done on the CPU, because there is an issue in + // # cupy.linalg.eigh with CUDA 9.2, which can return NaNs. It will has little + // # impact on performance, since the matrix size processed here is not large. + // alpha = cupy.asnumpy(alpha) + // beta = cupy.asnumpy(beta) + // t = numpy.diag(alpha) + // t = t + numpy.diag(beta[:-1], k=1) + // t = t + numpy.diag(beta[:-1], k=-1) + // if beta_k is not None: + // beta_k = cupy.asnumpy(beta_k) + // t[k, :k] = beta_k + // t[:k, k] = beta_k + // w, s = numpy.linalg.eigh(t) + + // # Pick-up k ritz-values and ritz-vectors + // if which == 'LA': + // idx = numpy.argsort(w) + // wk = w[idx[-k:]] + // sk = s[:, idx[-k:]] + // elif which == 'LM': + // idx = numpy.argsort(numpy.absolute(w)) + // wk = w[idx[-k:]] + // sk = s[:, idx[-k:]] + + // elif which == 'SA': + // idx = numpy.argsort(w) + // wk = w[idx[:k]] + // sk = s[:, idx[:k]] + // # elif which == 'SM': #dysfunctional + // # idx = cupy.argsort(abs(w)) + // # wk = w[idx[:k]] + // # sk = s[:,idx[:k]] + // return cupy.array(wk), cupy.array(sk) + + // FIXME: select the deterministic mode handle? + // cusolverStatus_t + // cusolverDnSetDeterministicMode(cusolverDnHandle_t handle, cusolverDeterministicMode_t mode) + + // FIXME: use public raft apis instead of using detail + + // add some primitves to create triangular dense matrix? + auto stream = resource::get_cuda_stream(handle); + + value_type_t zero = 0; + auto triangular_matrix = raft::make_device_matrix(handle, ncv, ncv); + raft::matrix::fill(handle, triangular_matrix.view(), zero); + + raft::matrix::initializeDiagonalMatrix(alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); + + // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); + + + int blockSize = 256; + int numBlocks = (ncv + blockSize - 1) / blockSize; + kernel_triangular_populate<<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); + + // if beta_k is not None: + // beta_k = cupy.asnumpy(beta_k) + // t[k, :k] = beta_k + // t[:k, k] = beta_k + + + if (beta_k) { + int threadsPerBlock = 256; + int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; + kernel_triangular_beta_k<<>>(triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); + } + + + + // print_device_vector("ritz triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); + + auto triangular_matrix_view = raft::make_device_matrix_view(triangular_matrix.data_handle(), ncv, ncv); + + // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); + + // raft::linalg::eig_jacobi(handle, triangular_matrix_view, eigenvectors, eigenvalues, zero); + // Lapack::steqr() + raft::linalg::eig_dc(handle, triangular_matrix_view, eigenvectors, eigenvalues); +} + + +template +void cupy_aux( + raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + raft::device_matrix_view V, + raft::device_matrix_view u, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + int start_idx, + int end_idx, + int ncv, + raft::device_matrix_view v, + raft::device_matrix_view uu, + raft::device_matrix_view vv +) +{ + auto stream = resource::get_cuda_stream(handle); + + int n = A->nrows_; + // std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places + //int i = 0; + + // int b = 0; + // int one = 1; + // int zero = 0; + // int mone = -1; + + // auto V_const = raft::make_device_matrix_view(V.data_handle(), ncv, n); + + + // v[...] = V[i_start] + raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); + // auto mp = raft::make_device_vector(handle, 1); + // raft::matrix::fill(handle, mp.view(), start_idx); + // auto mp_const = raft::make_device_vector_view(mp.data_handle(), 1); + // auto v_view = raft::make_device_matrix_view(v.data_handle(), 1, n); + + // raft::matrix::gather(handle, V_const, mp_const, v_view); + + std::cout << start_idx << " " << end_idx << std::endl; + // print_device_vector("V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("u", u.data_handle(), n, std::cout); + // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); + // print_device_vector("beta", beta.data_handle(), ncv, std::cout); + // print_device_vector("v", v.data_handle(), n, std::cout); + // print_device_vector("uu", v.data_handle(), n, std::cout); + // print_device_vector("vv", v.data_handle(), n, std::cout); + + + + // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); + + + auto cusparse_h = resource::get_cusparse_handle(handle); + cusparseSpMatDescr_t cusparse_A; + raft::sparse::detail::cusparsecreatecsr(&cusparse_A, + A->nrows_, + A->ncols_, + A->nnz_, + const_cast(A->row_offsets_), + const_cast(A->col_indices_), + const_cast(A->values_)); + + cusparseDnVecDescr_t cusparse_v; + cusparseDnVecDescr_t cusparse_u; + raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, v.data_handle()); + raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); + + // if (start_idx == 0) { + // print_device_vector("spmv v", v.data_handle(), n, std::cout); + // print_device_vector("spmv u", u.data_handle(), n, std::cout); + // } + + value_type_t one = 1; + value_type_t zero = 0; + size_t bufferSize; + raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); + auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); + + // LOOP + for (int i = start_idx; i < end_idx; i++) { + raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, cusparse_spmv_buffer.data_handle(), stream); + + // if (start_idx == 0 && i == 0) { + // print_device_vector("u spmv", u.data_handle(), n, std::cout); + // } + // print_device_vector("u spmv", u.data_handle(), n, std::cout); + + // # Call dotc: alpha[i] = v.conj().T @ u + // _cublas.setPointerMode( + // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) + // try: + // dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, + // alpha.data.ptr + i * alpha.itemsize) + // finally: + // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) + + // conjugate is only for complex numbers + // we should only have real numbers + + // FIXME: loop index + auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); + auto v_vector = raft::make_device_vector_view(v.data_handle(), n); + auto u_vector = raft::make_device_vector_view(u.data_handle(), n); + raft::linalg::dot(handle, v_vector, u_vector, alpha_i); + + // print_device_vector("alpha[i]", &alpha(0, i), 1, std::cout); + + // # Orthogonalize: u = u - alpha[i] * v - beta[i - 1] * V[i - 1] + // vv.fill(0) + // b[...] = beta[i - 1] # cast from real to complex + // print("vv", vv) + // print("b", b, "beta[i-1]", beta[i-1]) + // _cublas.setPointerMode( + // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) + // try: + // axpy(cublas_handle, n, + // alpha.data.ptr + i * alpha.itemsize, + // v.data.ptr, 1, vv.data.ptr, 1) + // axpy(cublas_handle, n, + // b.data.ptr, + // V[i - 1].data.ptr, 1, vv.data.ptr, 1) + // finally: + // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) + // axpy(cublas_handle, n, + // mone.ctypes.data, + // vv.data.ptr, 1, u.data.ptr, 1) + // FIXME: beta(0, i-1) + raft::matrix::fill(handle, vv, zero); + // raft::device_scalar_view beta_view = make_device_scalar_view(&beta(0, 0)); + //value_type_t scalar; + //raft::copy(&scalar, &beta(0, 0), 1, stream); + // const value_type_t scalar_const = scalar; + + // auto b = raft::make_device_scalar(handle, scalar_const); + + auto cublas_h = resource::get_cublas_handle(handle); + + value_type_t alpha_i_host = 0; + value_type_t b = 0; + value_type_t mone = -1; + + // FIXME: alpha(0, i) + raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); + raft::copy(&alpha_i_host, &(alpha(0, i)), 1, stream); + + // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); + + raft::linalg::detail::cublasaxpy(cublas_h, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); + // // FIXME: &V(i, 0) + // std::cout << "got here axpy" << std::endl; + raft::linalg::detail::cublasaxpy(cublas_h, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); + // std::cout << "got here axpy" << std::endl; + + raft::linalg::detail::cublasaxpy(cublas_h, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); + + // if (start_idx == 7 && i == 7) { + // print_device_vector("axpy u", u.data_handle(), n, std::cout); + // } + + + // std::cout << "got here axpy" << std::endl; + + // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); + // std::cout << "got here axpy" << std::endl; + + // print_device_vector("ortho u", u.data_handle(), n, std::cout); + + + // # Reorthogonalize: u -= V @ (V.conj().T @ u) + // gemv(cublas_handle, _cublas.CUBLAS_OP_C, + // n, i + 1, + // one.ctypes.data, V.data.ptr, n, + // u.data.ptr, 1, + // zero.ctypes.data, uu.data.ptr, 1) + // gemv(cublas_handle, _cublas.CUBLAS_OP_N, + // n, i + 1, + // mone.ctypes.data, V.data.ptr, n, + // uu.data.ptr, 1, + // one.ctypes.data, u.data.ptr, 1) + // alpha[i] += uu[i] + + // Are we transposing because of row-major to column-major since gemv requires column-major + + // ncv * n * + // std::cout << i << std::endl; + // if (start_idx == 7 && end_idx == 38 && i == 7) { + // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("ortho u", u.data_handle(), n, std::cout); + // } + // if (start_idx == 0 && end_idx == 38 && i == 0) { + // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("ortho u", u.data_handle(), n, std::cout); + // } + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_T, + n, + i + 1, + &one, + V.data_handle(), + n, + u.data_handle(), + 1, + &zero, + uu.data_handle(), + 1, + stream); + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_N, + n, + i + 1, + &mone, + V.data_handle(), + n, + uu.data_handle(), + 1, + &one, + u.data_handle(), + 1, + stream); + + auto uu_i = raft::make_device_scalar_view(&uu(0, i)); + raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); + + + // flush alpha + kernel_clamp_down<<<1, 1>>>(alpha_i.data_handle(), static_cast(1e-9)); + + // print_device_vector("gemv uu[i]", &uu(0, i), 1, std::cout); + // print_device_vector("gemv alpha[i]", &alpha(0, i), 1, std::cout); + // print_device_vector("gemv u", u.data_handle(), n, std::cout); + + + // FIXME: pointer mode for alpha beta? + // # Call nrm2 + // _cublas.setPointerMode( + // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) + // try: + // nrm2(cublas_handle, n, u.data.ptr, 1, + // beta.data.ptr + i * beta.itemsize) + // finally: + // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) + + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); + raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &beta(0, i), stream); + // print_device_vector("nrm2 beta[i]", &beta(0, i), 1, std::cout); + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); + + + int blockSize = 256; + int numBlocks = (n + blockSize - 1) / blockSize; + + kernel_clamp_down_vector<<>>(u.data_handle(), static_cast(1e-7), n); + + + kernel_clamp_down<<<1, 1>>>(&beta(0, i), static_cast(1e-6)); + + // FIXME: + // # Break here as the normalization below touches V[i+1] + // if i >= i_end - 1: + // break + if (i >= end_idx - 1) { + break; + } + + + // FIXME: custom cuda kernel vs raft primitives? + // # Normalize + // _kernel_normalize(u, beta, i, n, v, V) + + // _kernel_normalize = cupy.ElementwiseKernel( + // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', + // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') + + // v = u / beta[j]; + // V[i + (j+1) * n] = v; + + int threadsPerBlock = 256; + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + + kernel_normalize<<>>(u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); + + // print_device_vector("kernal normalize v", v.data_handle(), n, std::cout); + // print_device_vector("kernal normalize V", V.data_handle(), n*ncv, std::cout); + + // raft::linalg::unary_op(handle,u, v, + // [device_scalar = beta(0, i)] __device__(auto y) { + // return y / *device_scalar; + // }); + + // raft::copy(&V(i + (j+1) * n, 0), v.data_handle(), n, stream); + } +} + + +template +int cupy_smallest( + raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + value_type_t* eigVals_dev, + value_type_t* eigVecs_dev, + value_type_t* v0, + uint64_t seed +) +{ + // std::cout << "hello cupy smallest " << A->nrows_ << " " << A->ncols_ << " " << A->nnz_ << std::endl; + + int n = A->nrows_; + int ncv = restartIter; + // raft::print_device_vector("hello cupy v0 init", v0, n, std::cout); + auto stream = resource::get_cuda_stream(handle); + + std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places + + + // print_device_vector("v0_cpp", v0, n, std::cout); + + // u = v0 + // V[0] = v0 / cublas.nrm2(v0) + raft::device_matrix V = raft::make_device_matrix(handle, ncv, n); + raft::device_matrix_view V_0_view = raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] + raft::device_matrix_view v0_view = raft::make_device_matrix_view(v0, 1, n); + // raft::linalg::row_normalize(handle, v0_view, V_0_view, raft::linalg::L2Norm); + + raft::device_matrix u = raft::make_device_matrix(handle, 1, n); + raft::copy(u.data_handle(), v0, n, stream); + + auto cublas_h = resource::get_cublas_handle(handle); + value_type_t v0nrm = 0; + raft::linalg::detail::cublasnrm2(cublas_h, n, v0_view.data_handle(), 1, &v0nrm, stream); + // std::cout << "v0nrm " << v0nrm << std::endl; + + raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); + + raft::device_vector_view v0_vector_const = raft::make_device_vector_view(v0, n); + // raft::device_vector_view v0_vector = raft::make_device_vector_view(v0, n); + + raft::linalg::unary_op(handle, v0_vector_const, V_0_view, [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { + return y / *device_scalar; + }); + + // print_device_vector("V[0]", V_0_view.data_handle(), n, std::cout); + + // print_device_vector("V[0]", V.data_handle(), n, std::cout); + + raft::device_matrix alpha = raft::make_device_matrix(handle, 1, ncv); + raft::device_matrix beta = raft::make_device_matrix(handle, 1, ncv); + value_type_t zero = 0; + raft::matrix::fill(handle, alpha.view(), zero); + raft::matrix::fill(handle, beta.view(), zero); + + // start allocating for cupy_lanczos_fast() + + // cusparse_handle = None + // if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): + // cusparse_handle = device.get_cusparse_handle() + // spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE + // spmv_alpha = numpy.array(1.0, A.dtype) + // spmv_beta = numpy.array(0.0, A.dtype) + // spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) + // spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT + + // v = cupy.empty((n,), dtype=A.dtype) + // uu = cupy.empty((ncv,), dtype=A.dtype) + // vv = cupy.empty((n,), dtype=A.dtype) + // b = cupy.empty((), dtype=A.dtype) + // one = numpy.array(1.0, dtype=A.dtype) + // zero = numpy.array(0.0, dtype=A.dtype) + // mone = numpy.array(-1.0, dtype=A.dtype) + + raft::device_matrix v = raft::make_device_matrix(handle, 1, n); + raft::device_matrix aux_uu = raft::make_device_matrix(handle, 1, ncv); + raft::device_matrix vv = raft::make_device_matrix(handle, 1, n); + + // cupy_aux(A, V.view(), u_view, alpha.view(), beta.view()); + cupy_aux(handle, A, V.view(), u.view(), alpha.view(), beta.view(), 0, ncv, ncv, v.view(), aux_uu.view(), vv.view()); + + + + // # Lanczos iteration + // lanczos(a, V, u, alpha, beta, 0, ncv) + + // iter = ncv + // w, s = _eigsh_solve_ritz(alpha, beta, None, k, which) + // x = V.T @ s + + // # Compute residual + // beta_k = beta[-1] * s[-1, :] + // res = cublas.nrm2(beta_k) + + // uu = cupy.empty((k,), dtype=a.dtype) + auto eigenvectors = raft::make_device_matrix(handle, ncv, ncv); + auto eigenvalues = raft::make_device_vector(handle, ncv); + + cupy_solve_ritz(handle, alpha.view(), beta.view(), std::nullopt, nEigVecs, 0, ncv, eigenvectors.view(), eigenvalues.view()); + // print_device_vector("V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("u", u.data_handle(), n, std::cout); + // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); + // print_device_vector("beta", beta.data_handle(), ncv, std::cout); + // print_device_vector("v", v.data_handle(), n, std::cout); + + auto eigenvectors_k = raft::make_device_matrix_view(eigenvectors.data_handle(), ncv, nEigVecs); + raft::device_vector_view eigenvalues_k = raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); + + // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); + // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); + + + // x = V.T @ s + + // ncv*n x ncv*nEigVecs + + auto ritz_eigenvectors = raft::make_device_matrix_view(eigVecs_dev, n, nEigVecs); + + + auto V_T = raft::make_device_matrix_view(V.data_handle(), n, ncv); + raft::linalg::gemm(handle, V_T, eigenvectors_k, ritz_eigenvectors); + + // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, std::cout); + + + // # Compute residual + // beta_k = beta[-1] * s[-1, :] + // res = cublas.nrm2(beta_k) + + // FIXME: raft::linalg::map_offset() + // Define grid and block sizes + int blockSize = 256; // Number of threads per block + int numBlocks = (nEigVecs + blockSize - 1) / blockSize; + + auto s = raft::make_device_vector(handle, nEigVecs); + kernel_get_last_row<<>>(eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + + // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); + + + + auto beta_k = raft::make_device_vector(handle, nEigVecs); + raft::matrix::fill(handle, beta_k.view(), zero); + // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), nEigVecs); + auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + + raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); + + //auto cublas_h = resource::get_cublas_handle(handle); + value_type_t res = 0; + raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); + + // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); + // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + + //print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); + // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + std::cout << "res " << res << std::endl; + + + // uu = cupy.empty((k,), dtype=a.dtype) + + // while res > tol and iter < maxiter: + // # Setup for thick-restart + // beta[:k] = 0 + // alpha[:k] = w + // V[:k] = x.T + + // # u -= u.T @ V[:k].conj().T @ V[:k] + // cublas.gemv(_cublas.CUBLAS_OP_C, 1, V[:k].T, u, 0, uu) + // cublas.gemv(_cublas.CUBLAS_OP_N, -1, V[:k].T, uu, 1, u) + // V[k] = u / cublas.nrm2(u) + + // u[...] = a @ V[k] + // cublas.dotc(V[k], u, out=alpha[k]) + // u -= alpha[k] * V[k] + // u -= V[:k].T @ beta_k + // cublas.nrm2(u, out=beta[k]) + // V[k+1] = u / beta[k] + + // # Lanczos iteration + // lanczos(a, V, u, alpha, beta, k + 1, ncv) + + // iter += ncv - k + // w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) + // x = V.T @ s + + // # Compute residual + // beta_k = beta[-1] * s[-1, :] + // res = cublas.nrm2(beta_k) + + // print(iter, w, res) + + + auto uu = raft::make_device_matrix(handle, 0, nEigVecs); + int iter = ncv; + while (res > tol && iter < maxIter) { + // setup for thick-restart + // beta[:k] = 0 + auto beta_view = raft::make_device_matrix_view(beta.data_handle(), 1, nEigVecs); + raft::matrix::fill(handle, beta_view, zero); + // alpha[:k] = w + raft::copy(alpha.data_handle(), eigenvalues_k.data_handle(), nEigVecs, stream); + // V[:k] = x.T + + // auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); + // auto V_k_view = raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); + + + // auto x_T = raft::make_device_matrix(handle, nEigVecs, n); + auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); + + // raft::linalg::transpose(handle, ritz_eigenvectors, x_T.view()); + raft::copy(V.data_handle(), x_T.data_handle(), nEigVecs * n, stream); + + // print_device_vector("V[:k]", V.data_handle(), nEigVecs * n, std::cout); + + + // FIXME: manually multiply eigenvectors by -1 to see if that fixes anything + // 0, 1, 2, 5 + // auto V_zero = raft::make_device_vector_view(V.data_handle(), n); + // auto V_one = raft::make_device_vector_view(&((V.view()(1, 0))), n); + // auto V_two = raft::make_device_vector_view(&((V.view()(2, 0))), n); + // auto V_five = raft::make_device_vector_view(&((V.view()(5, 0))), n); + + // auto minusone = raft::make_host_scalar(-1); + + // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_zero), V_zero, minusone.view()); + // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_one), V_one, minusone.view()); + // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_two), V_two, minusone.view()); + // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_five), V_five, minusone.view()); + + + value_type_t one = 1; + value_type_t mone = -1; + // # u -= u.T @ V[:k].conj().T @ V[:k] + // cublas.gemv(_cublas.CUBLAS_OP_C, 1, V[:k].T, u, 0, uu) + // cublas.gemv(_cublas.CUBLAS_OP_N, -1, V[:k].T, uu, 1, u) + // V[k] = u / cublas.nrm2(u) + + // FIXME: uu is too small? + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_T, + nEigVecs, + n, + &one, + V.data_handle(), + nEigVecs, + u.data_handle(), + 1, + &zero, + uu.data_handle(), + 1, + stream); + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_N, + nEigVecs, + n, + &mone, + V.data_handle(), + nEigVecs, + uu.data_handle(), + 1, + &one, + u.data_handle(), + 1, + stream); + + + // V[k] = u / cublas.nrm2(u) + raft::device_matrix_view V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] + // auto cublas_h = resource::get_cublas_handle(handle); + value_type_t unrm = 0; + raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &unrm, stream); + // std::cout << "v0nrm " << v0nrm << std::endl; + + raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); + + raft::device_vector_view u_vector_const = raft::make_device_vector_view(u.data_handle(), n); + // raft::device_vector_view u_vector = raft::make_device_vector_view(u.data_handle(), n); + + raft::linalg::unary_op(handle, u_vector_const, V_0_view, [device_scalar = unrm_scalar.data_handle()] __device__(auto y) { + return y / *device_scalar; + }); + + + // raft::device_matrix_view V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] + // raft::linalg::row_normalize(handle, raft::make_const_mdspan(u.view()), V_0_view, raft::linalg::L2Norm); + // print_device_vector("V[k]", V_0_view.data_handle(), n, std::cout); + + // u[...] = a @ V[k] + // cublas.dotc(V[k], u, out=alpha[k]) + // u -= alpha[k] * V[k] + // u -= V[:k].T @ beta_k + // cublas.nrm2(u, out=beta[k]) + // V[k+1] = u / beta[k] + + auto cusparse_h = resource::get_cusparse_handle(handle); + cusparseSpMatDescr_t cusparse_A; + raft::sparse::detail::cusparsecreatecsr(&cusparse_A, + A->nrows_, + A->ncols_, + A->nnz_, + const_cast(A->row_offsets_), + const_cast(A->col_indices_), + const_cast(A->values_)); + + cusparseDnVecDescr_t cusparse_v; + cusparseDnVecDescr_t cusparse_u; + raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, V_0_view.data_handle()); + raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); + + // value_type_t one = 1; + value_type_t zero = 0; + size_t bufferSize; + raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); + auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); + + raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, cusparse_spmv_buffer.data_handle(), stream); + + // print_device_vector("u spmv", u.data_handle(), n, std::cout); + + // auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); + // auto v_vector = raft::make_device_vector_view(v.data_handle(), n); + // auto u_vector = raft::make_device_vector_view(u.data_handle(), n); + // raft::linalg::dot(handle, v_vector, u_vector, alpha_i); + + + auto alpha_k = raft::make_device_scalar_view(&((alpha.view())(0, nEigVecs))); + auto V_0_view_vector = raft::make_device_vector_view(V_0_view.data_handle(), n); + auto u_view_vector = raft::make_device_vector_view(u.data_handle(), n); + + raft::linalg::dot(handle, V_0_view_vector, u_view_vector, alpha_k); + + // raft::linalg::multiply_scalar(handle, V_0_view, u.view()); + // raft::linalg::unary_op(handle, V_0_view, u.view(), [device_scalar = alpha_k.data_handle()] __device__(auto y) { + // return y * (*device_scalar); + // }); + int threadsPerBlock = 256; + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + // kernel_subtract_and_scale<<>>(u.data_handle(), a, a, n); + kernel_subtract_and_scale<<>>(u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); + + // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); + + // u -= V[:k].T @ beta_k + // cublas.nrm2(u, out=beta[k]) + // V[k+1] = u / beta[k] + + auto temp = raft::make_device_vector(handle, n); + + // print_device_vector("temp", temp.data_handle(), n, std::cout); + + auto V_k = raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); + auto V_k_T = raft::make_device_matrix(handle, n, nEigVecs); + + // print_device_vector("V_k", V_k.data_handle(), nEigVecs*n, std::cout); + + raft::linalg::transpose(handle, V_k, V_k_T.view()); + + // print_device_vector("V_k_T", V_k_T.data_handle(), nEigVecs*n, std::cout); + + + // (n, nEigVecs) x (nEigVecs) + + // auto beta_k_vector = raft::make_device_vector_view(beta_k.data_handle(), nEigVecs); + + // raft::linalg::gemv(handle, make_const_mdspan(V_k_T.view()), beta_k_vector, temp.view()); + + + // FIXME: build small test case for cublasgemv + value_type_t three = 3; + value_type_t two = 2; + + std::vector M = {1, 2, 3, 4, 5, 6}; + std::vector vec = {1, 1}; + + auto M_dev = raft::make_device_matrix(handle, 2, 3); + auto vec_dev = raft::make_device_vector(handle, 2); + auto out = raft::make_device_vector(handle, 3); + raft::copy(M_dev.data_handle(), M.data(), 6, stream); + raft::copy(vec_dev.data_handle(), vec.data(), 2, stream); + // raft::linalg::detail::cublasgemv(cublas_h, + // CUBLAS_OP_N, + // three, + // two, + // &myone, + // M_dev.data_handle(), + // two, + // vec_dev.data_handle(), + // 1, + // &myzero, + // out.data_handle(), + // 1, + // stream); + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_N, + three, + two, + &one, + M_dev.data_handle(), + three, + vec_dev.data_handle(), + 1, + &zero, + out.data_handle(), + 1, + stream); + + // print_device_vector("out", out.data_handle(), 3, std::cout); + + + + raft::linalg::detail::cublasgemv(cublas_h, + CUBLAS_OP_N, + n, + nEigVecs, + &one, + V_k.data_handle(), + n, + beta_k.data_handle(), + 1, + &zero, + temp.data_handle(), + 1, + stream); + + auto one_scalar = raft::make_device_scalar(handle,1); + kernel_subtract_and_scale<<>>(u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); + + // print_device_vector("V", V.data_handle(), nEigVecs*n, std::cout); + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + + // print_device_vector("temp", temp.data_handle(), n, std::cout); + // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); + + + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); + raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); + // print_device_vector("nrm2 u", &((beta.view())(0, nEigVecs)), 1, std::cout); + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); + + auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); + auto u_vector = raft::make_device_vector_view(u.data_handle(), n); + + raft::linalg::unary_op(handle, u_vector, V_kplus1, [device_scalar = &((beta.view())(0, nEigVecs))] __device__(auto y) { + return y / *device_scalar; + }); + + // print_device_vector("V[k+1]", V_kplus1.data_handle(), n, std::cout); + + // # Lanczos iteration + // lanczos(a, V, u, alpha, beta, k + 1, ncv) + + // iter += ncv - k + // w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) + // x = V.T @ s + + // # Compute residual + // beta_k = beta[-1] * s[-1, :] + // res = cublas.nrm2(beta_k) + // print_device_vector("before alpha.view", alpha.data_handle(), ncv, std::cout); + // print_device_vector("before beta.view", beta.data_handle(), ncv, std::cout); + + // print_device_vector("V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("u", u.data_handle(), n, std::cout); + // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); + // print_device_vector("beta", beta.data_handle(), ncv, std::cout); + // print_device_vector("v", v.data_handle(), n, std::cout); + + cupy_aux(handle, A, V.view(), u.view(), alpha.view(), beta.view(), nEigVecs + 1, ncv, ncv, v.view(), aux_uu.view(), vv.view()); + // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); + // print_device_vector("beta", beta.data_handle(), ncv, std::cout); + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + iter += ncv - nEigVecs; + cupy_solve_ritz(handle, alpha.view(), beta.view(), beta_k.view(), nEigVecs, 0, ncv, eigenvectors.view(), eigenvalues.view()); + auto eigenvectors_k = raft::make_device_matrix_view(eigenvectors.data_handle(), ncv, nEigVecs); + // raft::device_vector_view eigenvalues_k = raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); + + // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); + // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); + + + // x = V.T @ s + + // ncv*n x ncv*nEigVecs + + auto ritz_eigenvectors = raft::make_device_matrix_view(eigVecs_dev, n, nEigVecs); + + + auto V_T = raft::make_device_matrix_view(V.data_handle(), n, ncv); + raft::linalg::gemm(handle, V_T, eigenvectors_k, ritz_eigenvectors); + + // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, std::cout); + + + // # Compute residual + // beta_k = beta[-1] * s[-1, :] + // res = cublas.nrm2(beta_k) + + // FIXME: raft::linalg::map_offset() + // Define grid and block sizes + int blockSize = 256; // Number of threads per block + int numBlocks = (nEigVecs + blockSize - 1) / blockSize; + + auto s = raft::make_device_vector(handle, nEigVecs); + kernel_get_last_row<<>>(eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + + // print_device_vector("eigenvectors", eigenvectors.data_handle(), ncv*ncv, std::cout); + // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); + + + + //auto beta_k = raft::make_device_vector(handle, nEigVecs); + raft::matrix::fill(handle, beta_k.view(), zero); + // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), nEigVecs); + auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + // print_device_vector("beta[-1]", beta_scalar.data_handle(), 1, std::cout); + + raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); + + auto cublas_h = resource::get_cublas_handle(handle); + // value_type_t res = 0; + // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); + // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); + + // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); + // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + std::cout << "res " << res << " " << iter << std::endl; + // break; + + } + + // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); + raft::copy(eigVals_dev, eigenvalues_k.data_handle(), nEigVecs, stream); + raft::copy(eigVecs_dev, ritz_eigenvectors.data_handle(), n*nEigVecs, stream); + + return 0; +} + +template +struct lanczos_solver_config { + int n_components; + int max_iterations; + int ncv; + ValueTypeT tolerance; + uint64_t seed; +}; + + +template +auto lanczos_compute_smallest_eigenvectors( + raft::resources const& handle, + raft::spectral::matrix::sparse_matrix_t const& A, + lanczos_solver_config const& config, + raft::device_vector_view v0, + raft::device_vector_view eigenvalues, + raft::device_matrix_view eigenvectors +) -> int +{ + return cupy_smallest(handle, &A, config.n_components, config.max_iterations, config.ncv, config.tolerance, eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle(), config.seed); +} + + } // namespace raft::sparse::solver::detail diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 462adc3703..2a72a0ae72 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -28,6 +28,24 @@ namespace raft::sparse::solver { // Eigensolver // ========================================================= +using detail::lanczos_solver_config; + + +template +auto lanczos_compute_smallest_eigenvectors( + raft::resources const& handle, + raft::spectral::matrix::sparse_matrix_t const& A, + lanczos_solver_config const& config, + raft::device_vector_view v0, + raft::device_vector_view eigenvalues, + raft::device_matrix_view eigenvectors +) -> int +{ + return detail::lanczos_compute_smallest_eigenvectors(handle, A, config, v0, eigenvalues, eigenvectors); +} + + + /** * @brief Compute smallest eigenvectors of symmetric matrix * Computes eigenvalues and eigenvectors that are least From 5ee60cbd0911c61b247442d688ce7914ad5dd376 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 21 Aug 2024 00:45:54 +0000 Subject: [PATCH 03/23] format style --- .../raft/sparse/solver/detail/lanczos.cuh | 800 ++++++++++-------- cpp/include/raft/sparse/solver/lanczos.cuh | 14 +- 2 files changed, 433 insertions(+), 381 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 020fa2da0c..c96d6f1bfd 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -29,13 +29,13 @@ #include // #include -#include "raft/core/detail/macros.hpp" -#include "raft/core/device_mdspan.hpp" -#include "raft/core/host_mdarray.hpp" -#include "raft/core/host_mdspan.hpp" -#include "raft/core/mdspan_types.hpp" -#include "raft/linalg/detail/add.cuh" -#include "raft/linalg/detail/gemv.hpp" +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -45,38 +45,36 @@ #include #include +#include #include #include -#include #include +#include #include -#include #include // #include // #include +#include +#include #include #include #include -#include - -#include #include + +#include + #include +#include #include #include + #include +#include #include #include #include #include - - -#include - -#include - -#include #include namespace raft::sparse::solver::detail { @@ -1440,45 +1438,29 @@ int computeLargestEigenvectors( return status; } - -template -RAFT_KERNEL kernel_subtract_and_scale( - T* u, - T* vec, - T* scalar, - int n -) { +template +RAFT_KERNEL kernel_subtract_and_scale(T* u, T* vec, T* scalar, int n) +{ int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < n) { - u[idx] -= (*scalar) * vec[idx]; - } + if (idx < n) { u[idx] -= (*scalar) * vec[idx]; } } - template -RAFT_KERNEL kernel_get_last_row( - const T* M, - T* S, - int numRows, - int numCols -) { +RAFT_KERNEL kernel_get_last_row(const T* M, T* S, int numRows, int numCols) +{ int col = threadIdx.x + blockIdx.x * blockDim.x; // Ensure the thread index is within the matrix width if (col < numCols) { - // Index in the column-major order matrix - int index = (numRows - 1) + col * numRows; - // Copy the value to the last row array - S[col] = M[index]; + // Index in the column-major order matrix + int index = (numRows - 1) + col * numRows; + // Copy the value to the last row array + S[col] = M[index]; } } - template -RAFT_KERNEL kernel_triangular_populate( - T* M, - const T* beta, - int n -) { +RAFT_KERNEL kernel_triangular_populate(T* M, const T* beta, int n) +{ // int row = blockIdx.x * blockDim.x + threadIdx.x; // if (row < n) { // // Upper diagonal @@ -1495,23 +1477,15 @@ RAFT_KERNEL kernel_triangular_populate( if (row < n) { // Upper diagonal: M[row + 1, row] in column-major - if (row < n - 1) { - M[(row + 1) * n + row] = beta[row]; - } + if (row < n - 1) { M[(row + 1) * n + row] = beta[row]; } // Lower diagonal: M[row - 1, row] in column-major - if (row > 0) { - M[(row - 1) * n + row] = beta[row - 1]; - } + if (row > 0) { M[(row - 1) * n + row] = beta[row - 1]; } } } -template -RAFT_KERNEL kernel_triangular_beta_k( - T* t, - const T* beta_k, - int k, - int n) +template +RAFT_KERNEL kernel_triangular_beta_k(T* t, const T* beta_k, int k, int n) { // int tid = threadIdx.x + blockIdx.x * blockDim.x; // if (tid < k) { @@ -1523,24 +1497,17 @@ RAFT_KERNEL kernel_triangular_beta_k( int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < k) { - // Update the k-th column: t[i, k] -> t[k * n + i] in column-major - t[tid * n + k] = beta_k[tid]; + // Update the k-th column: t[i, k] -> t[k * n + i] in column-major + t[tid * n + k] = beta_k[tid]; - // Update the k-th row: t[k, j] -> t[j * n + k] in column-major - t[k * n + tid] = beta_k[tid]; + // Update the k-th row: t[k, j] -> t[j * n + k] in column-major + t[k * n + tid] = beta_k[tid]; } } template -RAFT_KERNEL kernel_normalize( - const T* u, - const T* beta, - int j, - int n, - T* v, - T* V, - int size -) { +RAFT_KERNEL kernel_normalize(const T* u, const T* beta, int j, int n, T* v, T* V, int size) +{ // FIXME: custom cuda kernel vs raft primitives? // # Normalize // _kernel_normalize(u, beta, i, n, v, V) @@ -1549,7 +1516,7 @@ RAFT_KERNEL kernel_normalize( // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') - // v = u / beta[j]; + // v = u / beta[j]; // V[i + (j+1) * n] = v; int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -1559,34 +1526,24 @@ RAFT_KERNEL kernel_normalize( } else { v[i] = u[i] / beta[j]; } - V[i+ (j+1) * n] = v[i]; + V[i + (j + 1) * n] = v[i]; } } - -template -RAFT_KERNEL kernel_clamp_down( - T* value, - T threshold -) +template +RAFT_KERNEL kernel_clamp_down(T* value, T threshold) { *value = (fabs(*value) < threshold) ? 0 : *value; } -template -RAFT_KERNEL kernel_clamp_down_vector( - T* vec, - T threshold, - int size -) +template +RAFT_KERNEL kernel_clamp_down_vector(T* vec, T threshold, int size) { int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < size) { - vec[idx] = (fabs(vec[idx]) < threshold) ? 0 : vec[idx]; - } + if (idx < size) { vec[idx] = (fabs(vec[idx]) < threshold) ? 0 : vec[idx]; } } -template +template void cupy_solve_ritz( raft::resources const& handle, raft::device_matrix_view alpha, @@ -1596,10 +1553,8 @@ void cupy_solve_ritz( int which, int ncv, raft::device_matrix_view eigenvectors, - raft::device_vector_view eigenvalues -) + raft::device_vector_view eigenvalues) { - // # Note: This is done on the CPU, because there is an issue in // # cupy.linalg.eigh with CUDA 9.2, which can return NaNs. It will has little // # impact on performance, since the matrix size processed here is not large. @@ -1640,39 +1595,41 @@ void cupy_solve_ritz( // FIXME: use public raft apis instead of using detail - // add some primitves to create triangular dense matrix? - auto stream = resource::get_cuda_stream(handle); + // add some primitives to create triangular dense matrix? + auto stream = resource::get_cuda_stream(handle); value_type_t zero = 0; - auto triangular_matrix = raft::make_device_matrix(handle, ncv, ncv); + auto triangular_matrix = + raft::make_device_matrix(handle, ncv, ncv); raft::matrix::fill(handle, triangular_matrix.view(), zero); - raft::matrix::initializeDiagonalMatrix(alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); + raft::matrix::initializeDiagonalMatrix( + alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); - int blockSize = 256; int numBlocks = (ncv + blockSize - 1) / blockSize; - kernel_triangular_populate<<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); + kernel_triangular_populate + <<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); // if beta_k is not None: // beta_k = cupy.asnumpy(beta_k) // t[k, :k] = beta_k // t[:k, k] = beta_k - if (beta_k) { int threadsPerBlock = 256; - int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; - kernel_triangular_beta_k<<>>(triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); + int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; + kernel_triangular_beta_k<<>>( + triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); } - - // print_device_vector("ritz triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); - auto triangular_matrix_view = raft::make_device_matrix_view(triangular_matrix.data_handle(), ncv, ncv); + auto triangular_matrix_view = + raft::make_device_matrix_view( + triangular_matrix.data_handle(), ncv, ncv); // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); @@ -1681,43 +1638,41 @@ void cupy_solve_ritz( raft::linalg::eig_dc(handle, triangular_matrix_view, eigenvectors, eigenvalues); } - template -void cupy_aux( - raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, - raft::device_matrix_view V, - raft::device_matrix_view u, - raft::device_matrix_view alpha, - raft::device_matrix_view beta, - int start_idx, - int end_idx, - int ncv, - raft::device_matrix_view v, - raft::device_matrix_view uu, - raft::device_matrix_view vv -) +void cupy_aux(raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + raft::device_matrix_view V, + raft::device_matrix_view u, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + int start_idx, + int end_idx, + int ncv, + raft::device_matrix_view v, + raft::device_matrix_view uu, + raft::device_matrix_view vv) { - auto stream = resource::get_cuda_stream(handle); + auto stream = resource::get_cuda_stream(handle); int n = A->nrows_; // std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - //int i = 0; + // int i = 0; // int b = 0; // int one = 1; // int zero = 0; // int mone = -1; - // auto V_const = raft::make_device_matrix_view(V.data_handle(), ncv, n); - + // auto V_const = raft::make_device_matrix_view(V.data_handle(), ncv, n); // v[...] = V[i_start] raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); // auto mp = raft::make_device_vector(handle, 1); // raft::matrix::fill(handle, mp.view(), start_idx); // auto mp_const = raft::make_device_vector_view(mp.data_handle(), 1); - // auto v_view = raft::make_device_matrix_view(v.data_handle(), 1, n); + // auto v_view = raft::make_device_matrix_view(v.data_handle(), 1, n); // raft::matrix::gather(handle, V_const, mp_const, v_view); @@ -1730,20 +1685,17 @@ void cupy_aux( // print_device_vector("uu", v.data_handle(), n, std::cout); // print_device_vector("vv", v.data_handle(), n, std::cout); - - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A->nrows_, - A->ncols_, - A->nnz_, - const_cast(A->row_offsets_), - const_cast(A->col_indices_), - const_cast(A->values_)); + A->nrows_, + A->ncols_, + A->nnz_, + const_cast(A->row_offsets_), + const_cast(A->col_indices_), + const_cast(A->values_)); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -1755,15 +1707,33 @@ void cupy_aux( // print_device_vector("spmv u", u.data_handle(), n, std::cout); // } - value_type_t one = 1; + value_type_t one = 1; value_type_t zero = 0; size_t bufferSize; - raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); + raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + cusparse_A, + cusparse_v, + &zero, + cusparse_u, + CUSPARSE_SPMV_ALG_DEFAULT, + &bufferSize, + stream); auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); // LOOP for (int i = start_idx; i < end_idx; i++) { - raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, cusparse_spmv_buffer.data_handle(), stream); + raft::sparse::detail::cusparsespmv(cusparse_h, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + cusparse_A, + cusparse_v, + &zero, + cusparse_u, + CUSPARSE_SPMV_ALG_DEFAULT, + cusparse_spmv_buffer.data_handle(), + stream); // if (start_idx == 0 && i == 0) { // print_device_vector("u spmv", u.data_handle(), n, std::cout); @@ -1783,7 +1753,7 @@ void cupy_aux( // we should only have real numbers // FIXME: loop index - auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); + auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); auto v_vector = raft::make_device_vector_view(v.data_handle(), n); auto u_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::dot(handle, v_vector, u_vector, alpha_i); @@ -1811,9 +1781,10 @@ void cupy_aux( // vv.data.ptr, 1, u.data.ptr, 1) // FIXME: beta(0, i-1) raft::matrix::fill(handle, vv, zero); - // raft::device_scalar_view beta_view = make_device_scalar_view(&beta(0, 0)); - //value_type_t scalar; - //raft::copy(&scalar, &beta(0, 0), 1, stream); + // raft::device_scalar_view beta_view = make_device_scalar_view(&beta(0, 0)); + // value_type_t scalar; + // raft::copy(&scalar, &beta(0, 0), 1, stream); // const value_type_t scalar_const = scalar; // auto b = raft::make_device_scalar(handle, scalar_const); @@ -1821,8 +1792,8 @@ void cupy_aux( auto cublas_h = resource::get_cublas_handle(handle); value_type_t alpha_i_host = 0; - value_type_t b = 0; - value_type_t mone = -1; + value_type_t b = 0; + value_type_t mone = -1; // FIXME: alpha(0, i) raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); @@ -1830,18 +1801,20 @@ void cupy_aux( // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - raft::linalg::detail::cublasaxpy(cublas_h, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); + raft::linalg::detail::cublasaxpy( + cublas_h, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); // // FIXME: &V(i, 0) // std::cout << "got here axpy" << std::endl; - raft::linalg::detail::cublasaxpy(cublas_h, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); + raft::linalg::detail::cublasaxpy( + cublas_h, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); // std::cout << "got here axpy" << std::endl; - raft::linalg::detail::cublasaxpy(cublas_h, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); + raft::linalg::detail::cublasaxpy( + cublas_h, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); // if (start_idx == 7 && i == 7) { // print_device_vector("axpy u", u.data_handle(), n, std::cout); - // } - + // } // std::cout << "got here axpy" << std::endl; @@ -1850,7 +1823,6 @@ void cupy_aux( // print_device_vector("ortho u", u.data_handle(), n, std::cout); - // # Reorthogonalize: u -= V @ (V.conj().T @ u) // gemv(cublas_handle, _cublas.CUBLAS_OP_C, // n, i + 1, @@ -1878,37 +1850,36 @@ void cupy_aux( // } raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_T, - n, - i + 1, - &one, - V.data_handle(), - n, - u.data_handle(), - 1, - &zero, - uu.data_handle(), - 1, - stream); + CUBLAS_OP_T, + n, + i + 1, + &one, + V.data_handle(), + n, + u.data_handle(), + 1, + &zero, + uu.data_handle(), + 1, + stream); raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - n, - i + 1, - &mone, - V.data_handle(), - n, - uu.data_handle(), - 1, - &one, - u.data_handle(), - 1, - stream); + CUBLAS_OP_N, + n, + i + 1, + &mone, + V.data_handle(), + n, + uu.data_handle(), + 1, + &one, + u.data_handle(), + 1, + stream); auto uu_i = raft::make_device_scalar_view(&uu(0, i)); raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); - // flush alpha kernel_clamp_down<<<1, 1>>>(alpha_i.data_handle(), static_cast(1e-9)); @@ -1916,7 +1887,6 @@ void cupy_aux( // print_device_vector("gemv alpha[i]", &alpha(0, i), 1, std::cout); // print_device_vector("gemv u", u.data_handle(), n, std::cout); - // FIXME: pointer mode for alpha beta? // # Call nrm2 // _cublas.setPointerMode( @@ -1932,12 +1902,11 @@ void cupy_aux( // print_device_vector("nrm2 beta[i]", &beta(0, i), 1, std::cout); raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); - int blockSize = 256; int numBlocks = (n + blockSize - 1) / blockSize; - kernel_clamp_down_vector<<>>(u.data_handle(), static_cast(1e-7), n); - + kernel_clamp_down_vector<<>>( + u.data_handle(), static_cast(1e-7), n); kernel_clamp_down<<<1, 1>>>(&beta(0, i), static_cast(1e-6)); @@ -1945,10 +1914,7 @@ void cupy_aux( // # Break here as the normalization below touches V[i+1] // if i >= i_end - 1: // break - if (i >= end_idx - 1) { - break; - } - + if (i >= end_idx - 1) { break; } // FIXME: custom cuda kernel vs raft primitives? // # Normalize @@ -1958,16 +1924,17 @@ void cupy_aux( // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') - // v = u / beta[j]; + // v = u / beta[j]; // V[i + (j+1) * n] = v; int threadsPerBlock = 256; - int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; - kernel_normalize<<>>(u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); + kernel_normalize<<>>( + u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); - // print_device_vector("kernal normalize v", v.data_handle(), n, std::cout); - // print_device_vector("kernal normalize V", V.data_handle(), n*ncv, std::cout); + // print_device_vector("kernel normalize v", v.data_handle(), n, std::cout); + // print_device_vector("kernel normalize V", V.data_handle(), n*ncv, std::cout); // raft::linalg::unary_op(handle,u, v, // [device_scalar = beta(0, i)] __device__(auto y) { @@ -1978,63 +1945,70 @@ void cupy_aux( } } - template -int cupy_smallest( - raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - value_type_t* eigVals_dev, - value_type_t* eigVecs_dev, - value_type_t* v0, - uint64_t seed -) +int cupy_smallest(raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + value_type_t* eigVals_dev, + value_type_t* eigVecs_dev, + value_type_t* v0, + uint64_t seed) { - // std::cout << "hello cupy smallest " << A->nrows_ << " " << A->ncols_ << " " << A->nnz_ << std::endl; + // std::cout << "hello cupy smallest " << A->nrows_ << " " << A->ncols_ << " " << A->nnz_ << + // std::endl; - int n = A->nrows_; + int n = A->nrows_; int ncv = restartIter; // raft::print_device_vector("hello cupy v0 init", v0, n, std::cout); - auto stream = resource::get_cuda_stream(handle); + auto stream = resource::get_cuda_stream(handle); std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - // print_device_vector("v0_cpp", v0, n, std::cout); // u = v0 // V[0] = v0 / cublas.nrm2(v0) - raft::device_matrix V = raft::make_device_matrix(handle, ncv, n); - raft::device_matrix_view V_0_view = raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] - raft::device_matrix_view v0_view = raft::make_device_matrix_view(v0, 1, n); + raft::device_matrix V = + raft::make_device_matrix(handle, ncv, n); + raft::device_matrix_view V_0_view = + raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] + raft::device_matrix_view v0_view = + raft::make_device_matrix_view(v0, 1, n); // raft::linalg::row_normalize(handle, v0_view, V_0_view, raft::linalg::L2Norm); - raft::device_matrix u = raft::make_device_matrix(handle, 1, n); + raft::device_matrix u = + raft::make_device_matrix(handle, 1, n); raft::copy(u.data_handle(), v0, n, stream); - auto cublas_h = resource::get_cublas_handle(handle); + auto cublas_h = resource::get_cublas_handle(handle); value_type_t v0nrm = 0; raft::linalg::detail::cublasnrm2(cublas_h, n, v0_view.data_handle(), 1, &v0nrm, stream); // std::cout << "v0nrm " << v0nrm << std::endl; raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); - raft::device_vector_view v0_vector_const = raft::make_device_vector_view(v0, n); - // raft::device_vector_view v0_vector = raft::make_device_vector_view(v0, n); + raft::device_vector_view v0_vector_const = + raft::make_device_vector_view(v0, n); + // raft::device_vector_view v0_vector = + // raft::make_device_vector_view(v0, n); - raft::linalg::unary_op(handle, v0_vector_const, V_0_view, [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { - return y / *device_scalar; - }); + raft::linalg::unary_op( + handle, + v0_vector_const, + V_0_view, + [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { return y / *device_scalar; }); // print_device_vector("V[0]", V_0_view.data_handle(), n, std::cout); // print_device_vector("V[0]", V.data_handle(), n, std::cout); - raft::device_matrix alpha = raft::make_device_matrix(handle, 1, ncv); - raft::device_matrix beta = raft::make_device_matrix(handle, 1, ncv); + raft::device_matrix alpha = + raft::make_device_matrix(handle, 1, ncv); + raft::device_matrix beta = + raft::make_device_matrix(handle, 1, ncv); value_type_t zero = 0; raft::matrix::fill(handle, alpha.view(), zero); raft::matrix::fill(handle, beta.view(), zero); @@ -2058,14 +2032,26 @@ int cupy_smallest( // zero = numpy.array(0.0, dtype=A.dtype) // mone = numpy.array(-1.0, dtype=A.dtype) - raft::device_matrix v = raft::make_device_matrix(handle, 1, n); - raft::device_matrix aux_uu = raft::make_device_matrix(handle, 1, ncv); - raft::device_matrix vv = raft::make_device_matrix(handle, 1, n); + raft::device_matrix v = + raft::make_device_matrix(handle, 1, n); + raft::device_matrix aux_uu = + raft::make_device_matrix(handle, 1, ncv); + raft::device_matrix vv = + raft::make_device_matrix(handle, 1, n); // cupy_aux(A, V.view(), u_view, alpha.view(), beta.view()); - cupy_aux(handle, A, V.view(), u.view(), alpha.view(), beta.view(), 0, ncv, ncv, v.view(), aux_uu.view(), vv.view()); - - + cupy_aux(handle, + A, + V.view(), + u.view(), + alpha.view(), + beta.view(), + 0, + ncv, + ncv, + v.view(), + aux_uu.view(), + vv.view()); // # Lanczos iteration // lanczos(a, V, u, alpha, beta, 0, ncv) @@ -2079,35 +2065,48 @@ int cupy_smallest( // res = cublas.nrm2(beta_k) // uu = cupy.empty((k,), dtype=a.dtype) - auto eigenvectors = raft::make_device_matrix(handle, ncv, ncv); + auto eigenvectors = + raft::make_device_matrix(handle, ncv, ncv); auto eigenvalues = raft::make_device_vector(handle, ncv); - cupy_solve_ritz(handle, alpha.view(), beta.view(), std::nullopt, nEigVecs, 0, ncv, eigenvectors.view(), eigenvalues.view()); + cupy_solve_ritz(handle, + alpha.view(), + beta.view(), + std::nullopt, + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); // print_device_vector("V", V.data_handle(), n*ncv, std::cout); // print_device_vector("u", u.data_handle(), n, std::cout); // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); // print_device_vector("beta", beta.data_handle(), ncv, std::cout); // print_device_vector("v", v.data_handle(), n, std::cout); - auto eigenvectors_k = raft::make_device_matrix_view(eigenvectors.data_handle(), ncv, nEigVecs); - raft::device_vector_view eigenvalues_k = raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); + auto eigenvectors_k = raft::make_device_matrix_view( + eigenvectors.data_handle(), ncv, nEigVecs); + raft::device_vector_view eigenvalues_k = + raft::make_device_vector_view( + eigenvalues.data_handle(), nEigVecs); // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); - // x = V.T @ s // ncv*n x ncv*nEigVecs - auto ritz_eigenvectors = raft::make_device_matrix_view(eigVecs_dev, n, nEigVecs); - + auto ritz_eigenvectors = raft::make_device_matrix_view( + eigVecs_dev, n, nEigVecs); - auto V_T = raft::make_device_matrix_view(V.data_handle(), n, ncv); - raft::linalg::gemm(handle, V_T, eigenvectors_k, ritz_eigenvectors); - - // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, std::cout); + auto V_T = + raft::make_device_matrix_view(V.data_handle(), n, ncv); + raft::linalg::gemm( + handle, V_T, eigenvectors_k, ritz_eigenvectors); + // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, + // std::cout); // # Compute residual // beta_k = beta[-1] * s[-1, :] @@ -2115,37 +2114,37 @@ int cupy_smallest( // FIXME: raft::linalg::map_offset() // Define grid and block sizes - int blockSize = 256; // Number of threads per block + int blockSize = 256; // Number of threads per block int numBlocks = (nEigVecs + blockSize - 1) / blockSize; auto s = raft::make_device_vector(handle, nEigVecs); - kernel_get_last_row<<>>(eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + kernel_get_last_row<<>>( + eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); - - auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); - // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), nEigVecs); - auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), + // nEigVecs); + auto beta_scalar = + raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); - //auto cublas_h = resource::get_cublas_handle(handle); + // auto cublas_h = resource::get_cublas_handle(handle); value_type_t res = 0; raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); - //print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); - // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); - // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); + // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); + // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); std::cout << "res " << res << std::endl; - // uu = cupy.empty((k,), dtype=a.dtype) // while res > tol and iter < maxiter: @@ -2179,31 +2178,31 @@ int cupy_smallest( // print(iter, w, res) - - auto uu = raft::make_device_matrix(handle, 0, nEigVecs); + auto uu = raft::make_device_matrix(handle, 0, nEigVecs); int iter = ncv; while (res > tol && iter < maxIter) { // setup for thick-restart // beta[:k] = 0 - auto beta_view = raft::make_device_matrix_view(beta.data_handle(), 1, nEigVecs); + auto beta_view = raft::make_device_matrix_view( + beta.data_handle(), 1, nEigVecs); raft::matrix::fill(handle, beta_view, zero); // alpha[:k] = w raft::copy(alpha.data_handle(), eigenvalues_k.data_handle(), nEigVecs, stream); // V[:k] = x.T - // auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); - // auto V_k_view = raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); - + // auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); auto V_k_view = + // raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); // auto x_T = raft::make_device_matrix(handle, nEigVecs, n); - auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); + auto x_T = + raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); // raft::linalg::transpose(handle, ritz_eigenvectors, x_T.view()); raft::copy(V.data_handle(), x_T.data_handle(), nEigVecs * n, stream); // print_device_vector("V[:k]", V.data_handle(), nEigVecs * n, std::cout); - // FIXME: manually multiply eigenvectors by -1 to see if that fixes anything // 0, 1, 2, 5 // auto V_zero = raft::make_device_vector_view(V.data_handle(), n); @@ -2218,8 +2217,7 @@ int cupy_smallest( // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_two), V_two, minusone.view()); // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_five), V_five, minusone.view()); - - value_type_t one = 1; + value_type_t one = 1; value_type_t mone = -1; // # u -= u.T @ V[:k].conj().T @ V[:k] // cublas.gemv(_cublas.CUBLAS_OP_C, 1, V[:k].T, u, 0, uu) @@ -2229,36 +2227,36 @@ int cupy_smallest( // FIXME: uu is too small? raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_T, - nEigVecs, - n, - &one, - V.data_handle(), - nEigVecs, - u.data_handle(), - 1, - &zero, - uu.data_handle(), - 1, - stream); + CUBLAS_OP_T, + nEigVecs, + n, + &one, + V.data_handle(), + nEigVecs, + u.data_handle(), + 1, + &zero, + uu.data_handle(), + 1, + stream); raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - nEigVecs, - n, - &mone, - V.data_handle(), - nEigVecs, - uu.data_handle(), - 1, - &one, - u.data_handle(), - 1, - stream); - + CUBLAS_OP_N, + nEigVecs, + n, + &mone, + V.data_handle(), + nEigVecs, + uu.data_handle(), + 1, + &one, + u.data_handle(), + 1, + stream); // V[k] = u / cublas.nrm2(u) - raft::device_matrix_view V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] + raft::device_matrix_view V_0_view = + raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] // auto cublas_h = resource::get_cublas_handle(handle); value_type_t unrm = 0; raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &unrm, stream); @@ -2266,17 +2264,22 @@ int cupy_smallest( raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); - raft::device_vector_view u_vector_const = raft::make_device_vector_view(u.data_handle(), n); - // raft::device_vector_view u_vector = raft::make_device_vector_view(u.data_handle(), n); - - raft::linalg::unary_op(handle, u_vector_const, V_0_view, [device_scalar = unrm_scalar.data_handle()] __device__(auto y) { - return y / *device_scalar; - }); + raft::device_vector_view u_vector_const = + raft::make_device_vector_view(u.data_handle(), n); + // raft::device_vector_view u_vector = + // raft::make_device_vector_view(u.data_handle(), n); + raft::linalg::unary_op(handle, + u_vector_const, + V_0_view, + [device_scalar = unrm_scalar.data_handle()] __device__(auto y) { + return y / *device_scalar; + }); - // raft::device_matrix_view V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] - // raft::linalg::row_normalize(handle, raft::make_const_mdspan(u.view()), V_0_view, raft::linalg::L2Norm); - // print_device_vector("V[k]", V_0_view.data_handle(), n, std::cout); + // raft::device_matrix_view V_0_view = + // raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] + // raft::linalg::row_normalize(handle, raft::make_const_mdspan(u.view()), V_0_view, + // raft::linalg::L2Norm); print_device_vector("V[k]", V_0_view.data_handle(), n, std::cout); // u[...] = a @ V[k] // cublas.dotc(V[k], u, out=alpha[k]) @@ -2288,12 +2291,12 @@ int cupy_smallest( auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A->nrows_, - A->ncols_, - A->nnz_, - const_cast(A->row_offsets_), - const_cast(A->col_indices_), - const_cast(A->values_)); + A->nrows_, + A->ncols_, + A->nnz_, + const_cast(A->row_offsets_), + const_cast(A->col_indices_), + const_cast(A->values_)); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -2303,10 +2306,28 @@ int cupy_smallest( // value_type_t one = 1; value_type_t zero = 0; size_t bufferSize; - raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); + raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + cusparse_A, + cusparse_v, + &zero, + cusparse_u, + CUSPARSE_SPMV_ALG_DEFAULT, + &bufferSize, + stream); auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); - raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, cusparse_A, cusparse_v, &zero, cusparse_u, CUSPARSE_SPMV_ALG_DEFAULT, cusparse_spmv_buffer.data_handle(), stream); + raft::sparse::detail::cusparsespmv(cusparse_h, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, + cusparse_A, + cusparse_v, + &zero, + cusparse_u, + CUSPARSE_SPMV_ALG_DEFAULT, + cusparse_spmv_buffer.data_handle(), + stream); // print_device_vector("u spmv", u.data_handle(), n, std::cout); @@ -2315,21 +2336,23 @@ int cupy_smallest( // auto u_vector = raft::make_device_vector_view(u.data_handle(), n); // raft::linalg::dot(handle, v_vector, u_vector, alpha_i); - auto alpha_k = raft::make_device_scalar_view(&((alpha.view())(0, nEigVecs))); - auto V_0_view_vector = raft::make_device_vector_view(V_0_view.data_handle(), n); + auto V_0_view_vector = + raft::make_device_vector_view(V_0_view.data_handle(), n); auto u_view_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::dot(handle, V_0_view_vector, u_view_vector, alpha_k); // raft::linalg::multiply_scalar(handle, V_0_view, u.view()); - // raft::linalg::unary_op(handle, V_0_view, u.view(), [device_scalar = alpha_k.data_handle()] __device__(auto y) { + // raft::linalg::unary_op(handle, V_0_view, u.view(), [device_scalar = alpha_k.data_handle()] + // __device__(auto y) { // return y * (*device_scalar); // }); int threadsPerBlock = 256; - int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; // kernel_subtract_and_scale<<>>(u.data_handle(), a, a, n); - kernel_subtract_and_scale<<>>(u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); + kernel_subtract_and_scale<<>>( + u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); @@ -2341,8 +2364,10 @@ int cupy_smallest( // print_device_vector("temp", temp.data_handle(), n, std::cout); - auto V_k = raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); - auto V_k_T = raft::make_device_matrix(handle, n, nEigVecs); + auto V_k = raft::make_device_matrix_view( + V.data_handle(), nEigVecs, n); + auto V_k_T = + raft::make_device_matrix(handle, n, nEigVecs); // print_device_vector("V_k", V_k.data_handle(), nEigVecs*n, std::cout); @@ -2350,24 +2375,24 @@ int cupy_smallest( // print_device_vector("V_k_T", V_k_T.data_handle(), nEigVecs*n, std::cout); - // (n, nEigVecs) x (nEigVecs) - // auto beta_k_vector = raft::make_device_vector_view(beta_k.data_handle(), nEigVecs); - - // raft::linalg::gemv(handle, make_const_mdspan(V_k_T.view()), beta_k_vector, temp.view()); + // auto beta_k_vector = raft::make_device_vector_view(beta_k.data_handle(), nEigVecs); + // raft::linalg::gemv(handle, + // make_const_mdspan(V_k_T.view()), beta_k_vector, temp.view()); // FIXME: build small test case for cublasgemv value_type_t three = 3; - value_type_t two = 2; + value_type_t two = 2; - std::vector M = {1, 2, 3, 4, 5, 6}; + std::vector M = {1, 2, 3, 4, 5, 6}; std::vector vec = {1, 1}; - auto M_dev = raft::make_device_matrix(handle, 2, 3); + auto M_dev = raft::make_device_matrix(handle, 2, 3); auto vec_dev = raft::make_device_vector(handle, 2); - auto out = raft::make_device_vector(handle, 3); + auto out = raft::make_device_vector(handle, 3); raft::copy(M_dev.data_handle(), M.data(), 6, stream); raft::copy(vec_dev.data_handle(), vec.data(), 2, stream); // raft::linalg::detail::cublasgemv(cublas_h, @@ -2385,39 +2410,38 @@ int cupy_smallest( // stream); raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - three, - two, - &one, - M_dev.data_handle(), - three, - vec_dev.data_handle(), - 1, - &zero, - out.data_handle(), - 1, - stream); + CUBLAS_OP_N, + three, + two, + &one, + M_dev.data_handle(), + three, + vec_dev.data_handle(), + 1, + &zero, + out.data_handle(), + 1, + stream); // print_device_vector("out", out.data_handle(), 3, std::cout); - - raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - n, - nEigVecs, - &one, - V_k.data_handle(), - n, - beta_k.data_handle(), - 1, - &zero, - temp.data_handle(), - 1, - stream); - - auto one_scalar = raft::make_device_scalar(handle,1); - kernel_subtract_and_scale<<>>(u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); + CUBLAS_OP_N, + n, + nEigVecs, + &one, + V_k.data_handle(), + n, + beta_k.data_handle(), + 1, + &zero, + temp.data_handle(), + 1, + stream); + + auto one_scalar = raft::make_device_scalar(handle, 1); + kernel_subtract_and_scale<<>>( + u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); // print_device_vector("V", V.data_handle(), nEigVecs*n, std::cout); // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); @@ -2425,16 +2449,19 @@ int cupy_smallest( // print_device_vector("temp", temp.data_handle(), n, std::cout); // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); - raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); + raft::linalg::detail::cublasnrm2( + cublas_h, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); // print_device_vector("nrm2 u", &((beta.view())(0, nEigVecs)), 1, std::cout); raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); auto u_vector = raft::make_device_vector_view(u.data_handle(), n); - raft::linalg::unary_op(handle, u_vector, V_kplus1, [device_scalar = &((beta.view())(0, nEigVecs))] __device__(auto y) { + raft::linalg::unary_op(handle, + u_vector, + V_kplus1, + [device_scalar = &((beta.view())(0, nEigVecs))] __device__(auto y) { return y / *device_scalar; }); @@ -2459,31 +2486,54 @@ int cupy_smallest( // print_device_vector("beta", beta.data_handle(), ncv, std::cout); // print_device_vector("v", v.data_handle(), n, std::cout); - cupy_aux(handle, A, V.view(), u.view(), alpha.view(), beta.view(), nEigVecs + 1, ncv, ncv, v.view(), aux_uu.view(), vv.view()); + cupy_aux(handle, + A, + V.view(), + u.view(), + alpha.view(), + beta.view(), + nEigVecs + 1, + ncv, + ncv, + v.view(), + aux_uu.view(), + vv.view()); // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); // print_device_vector("beta", beta.data_handle(), ncv, std::cout); // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); iter += ncv - nEigVecs; - cupy_solve_ritz(handle, alpha.view(), beta.view(), beta_k.view(), nEigVecs, 0, ncv, eigenvectors.view(), eigenvalues.view()); - auto eigenvectors_k = raft::make_device_matrix_view(eigenvectors.data_handle(), ncv, nEigVecs); - // raft::device_vector_view eigenvalues_k = raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); + cupy_solve_ritz(handle, + alpha.view(), + beta.view(), + beta_k.view(), + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); + auto eigenvectors_k = raft::make_device_matrix_view( + eigenvectors.data_handle(), ncv, nEigVecs); + // raft::device_vector_view eigenvalues_k = + // raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); - // x = V.T @ s // ncv*n x ncv*nEigVecs - auto ritz_eigenvectors = raft::make_device_matrix_view(eigVecs_dev, n, nEigVecs); + auto ritz_eigenvectors = raft::make_device_matrix_view( + eigVecs_dev, n, nEigVecs); + auto V_T = raft::make_device_matrix_view( + V.data_handle(), n, ncv); + raft::linalg::gemm( + handle, V_T, eigenvectors_k, ritz_eigenvectors); - auto V_T = raft::make_device_matrix_view(V.data_handle(), n, ncv); - raft::linalg::gemm(handle, V_T, eigenvectors_k, ritz_eigenvectors); - - // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, std::cout); - + // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, + // std::cout); // # Compute residual // beta_k = beta[-1] * s[-1, :] @@ -2491,21 +2541,22 @@ int cupy_smallest( // FIXME: raft::linalg::map_offset() // Define grid and block sizes - int blockSize = 256; // Number of threads per block + int blockSize = 256; // Number of threads per block int numBlocks = (nEigVecs + blockSize - 1) / blockSize; auto s = raft::make_device_vector(handle, nEigVecs); - kernel_get_last_row<<>>(eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + kernel_get_last_row<<>>( + eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); // print_device_vector("eigenvectors", eigenvectors.data_handle(), ncv*ncv, std::cout); // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); - - - //auto beta_k = raft::make_device_vector(handle, nEigVecs); + // auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); - // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), nEigVecs); - auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), + // nEigVecs); + auto beta_scalar = + raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); // print_device_vector("beta[-1]", beta_scalar.data_handle(), 1, std::cout); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); @@ -2523,17 +2574,16 @@ int cupy_smallest( // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); std::cout << "res " << res << " " << iter << std::endl; // break; - } - // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); + // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); raft::copy(eigVals_dev, eigenvalues_k.data_handle(), nEigVecs, stream); - raft::copy(eigVecs_dev, ritz_eigenvectors.data_handle(), n*nEigVecs, stream); + raft::copy(eigVecs_dev, ritz_eigenvectors.data_handle(), n * nEigVecs, stream); return 0; } -template +template struct lanczos_solver_config { int n_components; int max_iterations; @@ -2542,19 +2592,25 @@ struct lanczos_solver_config { uint64_t seed; }; - -template +template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, raft::spectral::matrix::sparse_matrix_t const& A, lanczos_solver_config const& config, raft::device_vector_view v0, raft::device_vector_view eigenvalues, - raft::device_matrix_view eigenvectors -) -> int + raft::device_matrix_view eigenvectors) -> int { - return cupy_smallest(handle, &A, config.n_components, config.max_iterations, config.ncv, config.tolerance, eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle(), config.seed); + return cupy_smallest(handle, + &A, + config.n_components, + config.max_iterations, + config.ncv, + config.tolerance, + eigenvalues.data_handle(), + eigenvectors.data_handle(), + v0.data_handle(), + config.seed); } - } // namespace raft::sparse::solver::detail diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 2a72a0ae72..e20a1a9776 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include - namespace raft::sparse::solver { // ========================================================= @@ -30,22 +29,19 @@ namespace raft::sparse::solver { using detail::lanczos_solver_config; - -template +template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, raft::spectral::matrix::sparse_matrix_t const& A, lanczos_solver_config const& config, raft::device_vector_view v0, raft::device_vector_view eigenvalues, - raft::device_matrix_view eigenvectors -) -> int + raft::device_matrix_view eigenvectors) -> int { - return detail::lanczos_compute_smallest_eigenvectors(handle, A, config, v0, eigenvalues, eigenvectors); + return detail::lanczos_compute_smallest_eigenvectors( + handle, A, config, v0, eigenvalues, eigenvectors); } - - /** * @brief Compute smallest eigenvectors of symmetric matrix * Computes eigenvalues and eigenvectors that are least From fa222c1510927ab6dfa356e5c783a80a3426766f Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 21 Aug 2024 17:37:17 +0000 Subject: [PATCH 04/23] eigsh pylibraft api --- cpp/CMakeLists.txt | 4 + .../raft/sparse/solver/detail/lanczos.cuh | 6 +- cpp/include/raft_runtime/solver/lanczos.hpp | 54 ++++ .../raft_runtime/solver/lanczos_solver.cuh | 93 +++++++ .../solver/lanczos_solver_int64_double.cu | 23 ++ .../solver/lanczos_solver_int64_float.cu | 23 ++ .../solver/lanczos_solver_int_double.cu | 23 ++ .../solver/lanczos_solver_int_float.cu | 23 ++ python/pylibraft/CMakeLists.txt | 1 + .../pylibraft/pylibraft/solver/CMakeLists.txt | 27 ++ .../pylibraft/pylibraft/solver/__init__.pxd | 0 python/pylibraft/pylibraft/solver/__init__.py | 18 ++ .../pylibraft/solver/cpp/__init__.pxd | 0 .../pylibraft/solver/cpp/__init__.py | 0 python/pylibraft/pylibraft/solver/lanczos.pyx | 234 ++++++++++++++++++ 15 files changed, 526 insertions(+), 3 deletions(-) create mode 100644 cpp/include/raft_runtime/solver/lanczos.hpp create mode 100644 cpp/src/raft_runtime/solver/lanczos_solver.cuh create mode 100644 cpp/src/raft_runtime/solver/lanczos_solver_int64_double.cu create mode 100644 cpp/src/raft_runtime/solver/lanczos_solver_int64_float.cu create mode 100644 cpp/src/raft_runtime/solver/lanczos_solver_int_double.cu create mode 100644 cpp/src/raft_runtime/solver/lanczos_solver_int_float.cu create mode 100644 python/pylibraft/pylibraft/solver/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/solver/__init__.pxd create mode 100644 python/pylibraft/pylibraft/solver/__init__.py create mode 100644 python/pylibraft/pylibraft/solver/cpp/__init__.pxd create mode 100644 python/pylibraft/pylibraft/solver/cpp/__init__.py create mode 100644 python/pylibraft/pylibraft/solver/lanczos.pyx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d7eeb60b27..ce4fa9ee1b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -555,6 +555,10 @@ if(RAFT_COMPILE_LIBRARY) src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu src/raft_runtime/random/rmat_rectangular_generator_int_double.cu src/raft_runtime/random/rmat_rectangular_generator_int_float.cu + src/raft_runtime/solver/lanczos_solver_int64_double.cu + src/raft_runtime/solver/lanczos_solver_int64_float.cu + src/raft_runtime/solver/lanczos_solver_int_double.cu + src/raft_runtime/solver/lanczos_solver_int_float.cu src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index c96d6f1bfd..73cd84686f 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1948,9 +1948,9 @@ void cupy_aux(raft::resources const& handle, template int cupy_smallest(raft::resources const& handle, spectral::matrix::sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, + int nEigVecs, + int maxIter, + int restartIter, value_type_t tol, value_type_t* eigVals_dev, value_type_t* eigVecs_dev, diff --git a/cpp/include/raft_runtime/solver/lanczos.hpp b/cpp/include/raft_runtime/solver/lanczos.hpp new file mode 100644 index 0000000000..21ba0d1627 --- /dev/null +++ b/cpp/include/raft_runtime/solver/lanczos.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +namespace raft::runtime::solver { + +/** + * @defgroup rmat_runtime RMAT Runtime API + * @{ + */ + +#define FUNC_DECL(IndexType, ValueType) \ + void lanczos_solver(const raft::resources& handle, \ + IndexType* rows, \ + IndexType* cols, \ + ValueType* vals, \ + int nnz, \ + int n, \ + int n_components, \ + int max_iterations, \ + int ncv, \ + ValueType tolerance, \ + uint64_t seed, \ + ValueType* v0, \ + ValueType* eigenvalues, \ + ValueType* eigenvectors) + +FUNC_DECL(int, float); +FUNC_DECL(int64_t, float); +FUNC_DECL(int, double); +FUNC_DECL(int64_t, double); + +#undef FUNC_DECL + +/** @} */ // end group rmat_runtime + +} // namespace raft::runtime::solver diff --git a/cpp/src/raft_runtime/solver/lanczos_solver.cuh b/cpp/src/raft_runtime/solver/lanczos_solver.cuh new file mode 100644 index 0000000000..142ed589bf --- /dev/null +++ b/cpp/src/raft_runtime/solver/lanczos_solver.cuh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +template +void run_lanczos_solver(const raft::resources& handle, + IndexType* rows, + IndexType* cols, + ValueType* vals, + int nnz, + int n, + int n_components, + int max_iterations, + int ncv, + ValueType tolerance, + uint64_t seed, + ValueType* v0, + ValueType* eigenvalues, + ValueType* eigenvectors) +{ + auto stream = raft::resource::get_cuda_stream(handle); + raft::device_vector_view rows_view = + raft::make_device_vector_view(rows, n + 1); + raft::device_vector_view cols_view = + raft::make_device_vector_view(cols, nnz); + raft::device_vector_view vals_view = + raft::make_device_vector_view(vals, nnz); + raft::device_vector_view v0_view = + raft::make_device_vector_view(v0, n); + raft::device_vector_view eigenvalues_view = + raft::make_device_vector_view(eigenvalues, n_components); + raft::device_matrix_view eigenvectors_view = + raft::make_device_matrix_view( + eigenvectors, n, n_components); + + raft::spectral::matrix::sparse_matrix_t const csr_m{ + handle, rows_view.data_handle(), cols_view.data_handle(), vals_view.data_handle(), n, nnz}; + raft::sparse::solver::lanczos_solver_config config{ + n_components, max_iterations, ncv, tolerance, seed}; + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + handle, csr_m, config, v0_view, eigenvalues_view, eigenvectors_view); +} + +#define FUNC_DEF(IndexType, ValueType) \ + void lanczos_solver(const raft::resources& handle, \ + IndexType* rows, \ + IndexType* cols, \ + ValueType* vals, \ + int nnz, \ + int n, \ + int n_components, \ + int max_iterations, \ + int ncv, \ + ValueType tolerance, \ + uint64_t seed, \ + ValueType* v0, \ + ValueType* eigenvalues, \ + ValueType* eigenvectors) \ + { \ + run_lanczos_solver(handle, \ + rows, \ + cols, \ + vals, \ + nnz, \ + n, \ + n_components, \ + max_iterations, \ + ncv, \ + tolerance, \ + seed, \ + v0, \ + eigenvalues, \ + eigenvectors); \ + } diff --git a/cpp/src/raft_runtime/solver/lanczos_solver_int64_double.cu b/cpp/src/raft_runtime/solver/lanczos_solver_int64_double.cu new file mode 100644 index 0000000000..dbc4a0e886 --- /dev/null +++ b/cpp/src/raft_runtime/solver/lanczos_solver_int64_double.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lanczos_solver.cuh" + +namespace raft::runtime::solver { + +FUNC_DEF(int64_t, double); + +} // namespace raft::runtime::solver diff --git a/cpp/src/raft_runtime/solver/lanczos_solver_int64_float.cu b/cpp/src/raft_runtime/solver/lanczos_solver_int64_float.cu new file mode 100644 index 0000000000..b9bea3cf23 --- /dev/null +++ b/cpp/src/raft_runtime/solver/lanczos_solver_int64_float.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lanczos_solver.cuh" + +namespace raft::runtime::solver { + +FUNC_DEF(int64_t, float); + +} // namespace raft::runtime::solver diff --git a/cpp/src/raft_runtime/solver/lanczos_solver_int_double.cu b/cpp/src/raft_runtime/solver/lanczos_solver_int_double.cu new file mode 100644 index 0000000000..3e716396fc --- /dev/null +++ b/cpp/src/raft_runtime/solver/lanczos_solver_int_double.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lanczos_solver.cuh" + +namespace raft::runtime::solver { + +FUNC_DEF(int, double); + +} // namespace raft::runtime::solver diff --git a/cpp/src/raft_runtime/solver/lanczos_solver_int_float.cu b/cpp/src/raft_runtime/solver/lanczos_solver_int_float.cu new file mode 100644 index 0000000000..9f1f0fc67d --- /dev/null +++ b/cpp/src/raft_runtime/solver/lanczos_solver_int_float.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lanczos_solver.cuh" + +namespace raft::runtime::solver { + +FUNC_DEF(int, float); + +} // namespace raft::runtime::solver diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index 6cbe8e4cbf..48f26c4d31 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -71,6 +71,7 @@ add_subdirectory(pylibraft/distance) add_subdirectory(pylibraft/matrix) add_subdirectory(pylibraft/neighbors) add_subdirectory(pylibraft/random) +add_subdirectory(pylibraft/solver) add_subdirectory(pylibraft/cluster) if(DEFINED cython_lib_dir) diff --git a/python/pylibraft/pylibraft/solver/CMakeLists.txt b/python/pylibraft/pylibraft/solver/CMakeLists.txt new file mode 100644 index 0000000000..c9fbd5d0f0 --- /dev/null +++ b/python/pylibraft/pylibraft/solver/CMakeLists.txt @@ -0,0 +1,27 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources lanczos.pyx) + +# TODO: should finally be replaced with 'compiled' library to be more generic, when that is +# available +set(linked_libraries raft::raft raft::compiled) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX solver_ +) diff --git a/python/pylibraft/pylibraft/solver/__init__.pxd b/python/pylibraft/pylibraft/solver/__init__.pxd new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/solver/__init__.py b/python/pylibraft/pylibraft/solver/__init__.py new file mode 100644 index 0000000000..c418651aca --- /dev/null +++ b/python/pylibraft/pylibraft/solver/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .lanczos import eigsh + +__all__ = ["rmat"] diff --git a/python/pylibraft/pylibraft/solver/cpp/__init__.pxd b/python/pylibraft/pylibraft/solver/cpp/__init__.pxd new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/solver/cpp/__init__.py b/python/pylibraft/pylibraft/solver/cpp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/solver/lanczos.pyx b/python/pylibraft/pylibraft/solver/lanczos.pyx new file mode 100644 index 0000000000..deb511a707 --- /dev/null +++ b/python/pylibraft/pylibraft/solver/lanczos.pyx @@ -0,0 +1,234 @@ +# +# Copyright (c) 2024-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import cupy as cp +import numpy as np + +from cython.operator cimport dereference as deref +from libc.stdint cimport int64_t, uint64_t, uintptr_t + +from pylibraft.common import Handle, cai_wrapper, device_ndarray +from pylibraft.common.handle import auto_sync_handle + +from libcpp cimport bool + +from pylibraft.common.handle cimport device_resources +from pylibraft.random.cpp.rng_state cimport RngState + + +cdef extern from "raft_runtime/solver/lanczos.hpp" \ + namespace "raft::runtime::solver" nogil: + + cdef void lanczos_solver( + const device_resources &handle, + int64_t* rows, + int64_t* cols, + double* vals, + int nnz, + int n, + int n_components, + int max_iterations, + int ncv, + double tolerance, + uint64_t seed, + double* v0, + double* eigenvalues, + double* eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + int64_t* rows, + int64_t* cols, + float* vals, + int nnz, + int n, + int n_components, + int max_iterations, + int ncv, + float tolerance, + uint64_t seed, + float* v0, + float* eigenvalues, + float* eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + int* rows, + int* cols, + double* vals, + int nnz, + int n, + int n_components, + int max_iterations, + int ncv, + double tolerance, + uint64_t seed, + double* v0, + double* eigenvalues, + double* eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + int* rows, + int* cols, + float* vals, + int nnz, + int n, + int n_components, + int max_iterations, + int ncv, + float tolerance, + uint64_t seed, + float* v0, + float* eigenvalues, + float* eigenvectors) except + + + +@auto_sync_handle +def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, + tol=0, seed=None, handle=None): + + if A is None: + raise Exception("'A' cannot be None!") + + rows = A.indptr + cols = A.indices + vals = A.data + + rows = cai_wrapper(rows) + cols = cai_wrapper(cols) + vals = cai_wrapper(vals) + + IndexType = rows.dtype + ValueType = vals.dtype + + N = A.shape[0] + n = N + + rows_ptr = rows.data + cols_ptr = cols.data + vals_ptr = vals.data + + if ncv is None: + # ncv = min(max(2 * k, k + 32), n - 1) + ncv = min(n, max(2*k + 1, 20)) + else: + ncv = min(max(ncv, k + 2), n - 1) + + seed = seed if seed is not None else 42 + if maxiter is None: + maxiter = 10 * n + if tol == 0: + tol = np.finfo(ValueType).eps + + if v0 is None: + rng = np.random.default_rng(seed) + v0 = rng.random((N,)).astype(vals.dtype) + + v0 = cai_wrapper(v0) + v0_ptr = v0.data + + eigenvectors = device_ndarray.empty((N, k), dtype=ValueType, order='F') + eigenvalues = device_ndarray.empty((k), dtype=ValueType, order='F') + + eigenvectors_cai = cai_wrapper(eigenvectors) + eigenvalues_cai = cai_wrapper(eigenvalues) + + eigenvectors_ptr = eigenvectors_cai.data + eigenvalues_ptr = eigenvalues_cai.data + + handle = handle if handle is not None else Handle() + cdef device_resources *h = handle.getHandle() + + print(IndexType, ValueType) + + if IndexType == np.int32 and ValueType == np.float32: + lanczos_solver( + deref(h), + rows_ptr, + cols_ptr, + vals_ptr, + A.nnz, + N, + k, + maxiter, + ncv, + tol, + seed, + v0_ptr, + eigenvalues_ptr, + eigenvectors_ptr, + ) + elif IndexType == np.int64 and ValueType == np.float32: + lanczos_solver( + deref(h), + rows_ptr, + cols_ptr, + vals_ptr, + A.nnz, + N, + k, + maxiter, + ncv, + tol, + seed, + v0_ptr, + eigenvalues_ptr, + eigenvectors_ptr, + ) + elif IndexType == np.int32 and ValueType == np.float64: + lanczos_solver( + deref(h), + rows_ptr, + cols_ptr, + vals_ptr, + A.nnz, + N, + k, + maxiter, + ncv, + tol, + seed, + v0_ptr, + eigenvalues_ptr, + eigenvectors_ptr, + ) + elif IndexType == np.int64 and ValueType == np.float64: + lanczos_solver( + deref(h), + rows_ptr, + cols_ptr, + vals_ptr, + A.nnz, + N, + k, + maxiter, + ncv, + tol, + seed, + v0_ptr, + eigenvalues_ptr, + eigenvectors_ptr, + ) + else: + raise ValueError("dtype IndexType=%s and ValueType=%s not supported" % + (IndexType, ValueType)) + + return (cp.asarray(eigenvalues), cp.asarray(eigenvectors)) From 7240937f70a3196452b202edacb6d7cd6132a109 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 21 Aug 2024 20:47:45 +0000 Subject: [PATCH 05/23] gtest --- cpp/test/CMakeLists.txt | 2 +- cpp/test/sparse/solver/lanczos.cu | 597 ++++++++++++++++++++++++++++++ 2 files changed, 598 insertions(+), 1 deletion(-) create mode 100644 cpp/test/sparse/solver/lanczos.cu diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index e3af6ebb78..46041977b7 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -301,7 +301,7 @@ if(BUILD_TESTS) ) ConfigureTest( - NAME SOLVERS_TEST PATH cluster/cluster_solvers_deprecated.cu linalg/eigen_solvers.cu + NAME SOLVERS_TEST PATH cluster/cluster_solvers_deprecated.cu linalg/eigen_solvers.cu sparse/solver/lanczos.cu lap/lap.cu sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY ) diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu new file mode 100644 index 0000000000..fdff55ed12 --- /dev/null +++ b/cpp/test/sparse/solver/lanczos.cu @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../test_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace raft { +namespace sparse { + +template +struct lanczos_inputs { + int n_components; + int restartiter; + int maxiter; + int conv_n_iters; + float conv_eps; + float tol; + uint64_t seed; + std::vector rows; // indptr + std::vector cols; // indices + std::vector vals; // data + std::vector expected_eigenvalues; +}; + +template +struct rmat_lanczos_inputs { + int n_components; + int restartiter; + int maxiter; + int conv_n_iters; + float conv_eps; + float tol; + uint64_t seed; + int r_scale; + int c_scale; + float sparsity; + std::vector expected_eigenvalues; +}; + +template +class dummy_lanczos_tests + : public ::testing::TestWithParam> {}; + +template +class rmat_lanczos_tests + : public ::testing::TestWithParam> { + public: + rmat_lanczos_tests() + : params(::testing::TestWithParam>::GetParam()), + stream(resource::get_cuda_stream(handle)), + rng(params.seed), + expected_eigenvalues(raft::make_device_vector( + handle, params.n_components)), + r_scale(params.r_scale), + c_scale(params.c_scale), + sparsity(params.sparsity) + { + } + + protected: + void SetUp() override + { + raft::copy(expected_eigenvalues.data_handle(), + params.expected_eigenvalues.data(), + params.n_components, + stream); + } + + void TearDown() override {} + + void Run() + { + uint64_t n_edges = sparsity * ((long long)(1 << r_scale) * (long long)(1 << c_scale)); + uint64_t n_nodes = 1 << std::max(r_scale, c_scale); + uint64_t theta_len = std::max(r_scale, c_scale) * 4; + + raft::device_vector theta = + raft::make_device_vector(handle, theta_len); + raft::random::uniform(handle, rng, theta.view(), 0, 1); + + raft::device_matrix out = + raft::make_device_matrix(handle, n_edges * 2, 2); + raft::device_vector out_src = + raft::make_device_vector(handle, n_edges); + raft::device_vector out_dst = + raft::make_device_vector(handle, n_edges); + + raft::random::rmat_rectangular_gen(handle, + rng, + make_const_mdspan(theta.view()), + out.view(), + out_src.view(), + out_dst.view(), + r_scale, + c_scale); + + raft::device_vector out_data = + raft::make_device_vector(handle, n_edges); + raft::matrix::fill(handle, out_data.view(), 1.0); + raft::sparse::COO coo(stream); + + raft::sparse::op::coo_sort(n_nodes, + n_nodes, + n_edges, + out_src.data_handle(), + out_dst.data_handle(), + out_data.data_handle(), + stream); + raft::sparse::op::max_duplicates(handle, + coo, + out_src.data_handle(), + out_dst.data_handle(), + out_data.data_handle(), + n_edges, + n_nodes, + n_nodes); + + raft::sparse::COO symmetric_coo(stream); + raft::sparse::linalg::symmetrize( + handle, coo.rows(), coo.cols(), coo.vals(), coo.n_rows, coo.n_cols, coo.nnz, symmetric_coo); + + raft::device_vector row_indices = + raft::make_device_vector(handle, + symmetric_coo.n_rows + 1); + raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), + symmetric_coo.nnz, + row_indices.data_handle(), + symmetric_coo.n_rows + 1, + stream); + + int n_components = params.n_components; + + raft::device_vector v0 = + raft::make_device_vector(handle, symmetric_coo.n_rows); + + raft::random::uniform(handle, rng, v0.view(), 0, 1); + // raft::spectral::matrix::sparse_matrix_t const csr_m{handle, + // row_indices.data_handle(), symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, + // symmetric_coo.nnz}; raft::spectral::eigen_solver_config_t + // cfg{n_components, params.maxiter, params.restartiter, params.tol, false, rng.seed}; + std::tuple stats; + // raft::spectral::lanczos_solver_t eigen_solver{cfg}; + + raft::device_vector eigenvalues = + raft::make_device_vector(handle, n_components); + raft::device_matrix eigenvectors = + raft::make_device_matrix( + handle, symmetric_coo.n_rows, n_components); + + raft::spectral::matrix::sparse_matrix_t const csr_m{ + handle, + row_indices.data_handle(), + symmetric_coo.cols(), + symmetric_coo.vals(), + symmetric_coo.n_rows, + symmetric_coo.nnz}; + raft::sparse::solver::lanczos_solver_config config{ + n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; + std::get<0>(stats) = + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + handle, csr_m, config, v0.view(), eigenvalues.view(), eigenvectors.view()); + + // std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, csr_m, + // eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle()); + + ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), + expected_eigenvalues.data_handle(), + n_components, + raft::CompareApprox(1e-5), + stream)); + } + + protected: + rmat_lanczos_inputs params; + raft::resources handle; + cudaStream_t stream; + raft::random::RngState rng; + int r_scale; + int c_scale; + float sparsity; + raft::device_vector expected_eigenvalues; +}; + +template +class lanczos_tests : public ::testing::TestWithParam> { + public: + lanczos_tests() + : params(::testing::TestWithParam>::GetParam()), + stream(resource::get_cuda_stream(handle)), + n(params.rows.size() - 1), + nnz(params.vals.size()), + rng(params.seed), + rows(raft::make_device_vector(handle, n + 1)), + cols(raft::make_device_vector(handle, nnz)), + vals(raft::make_device_vector(handle, nnz)), + v0(raft::make_device_vector(handle, n)), + eigenvalues(raft::make_device_vector( + handle, params.n_components)), + eigenvectors(raft::make_device_matrix( + handle, n, params.n_components)), + expected_eigenvalues(raft::make_device_vector( + handle, params.n_components)) + { + } + + protected: + void SetUp() override + { + raft::copy(rows.data_handle(), params.rows.data(), n + 1, stream); + raft::copy(cols.data_handle(), params.cols.data(), nnz, stream); + raft::copy(vals.data_handle(), params.vals.data(), nnz, stream); + raft::copy(expected_eigenvalues.data_handle(), + params.expected_eigenvalues.data(), + params.n_components, + stream); + } + + void TearDown() override {} + + void Run() + { + raft::random::uniform(handle, rng, v0.view(), 0, 1); + // raft::spectral::matrix::sparse_matrix_t const csr_m{handle, + // rows.data_handle(), cols.data_handle(), vals.data_handle(), n, nnz}; + // raft::spectral::eigen_solver_config_t cfg{params.n_components, + // params.maxiter, params.restartiter, params.tol, false, params.seed}; + std::tuple stats; + // raft::spectral::lanczos_solver_t eigen_solver{cfg}; + + raft::spectral::matrix::sparse_matrix_t const csr_m{ + handle, rows.data_handle(), cols.data_handle(), vals.data_handle(), n, nnz}; + raft::sparse::solver::lanczos_solver_config config{ + params.n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; + std::get<0>(stats) = + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + handle, csr_m, config, v0.view(), eigenvalues.view(), eigenvectors.view()); + + // std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, csr_m, + // eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle()); + + ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), + expected_eigenvalues.data_handle(), + params.n_components, + raft::CompareApprox(1e-5), + stream)); + } + + protected: + lanczos_inputs params; + raft::resources handle; + cudaStream_t stream; + int n; + int nnz; + raft::random::RngState rng; + raft::device_vector rows; + raft::device_vector cols; + raft::device_vector vals; + raft::device_vector v0; + raft::device_vector eigenvalues; + raft::device_matrix eigenvectors; + raft::device_vector expected_eigenvalues; +}; + +const std::vector> inputsf = { + {2, + 34, + 10000, + 0, + 0, + 1e-15, + 42, + {0, 0, 0, 0, 3, 5, 6, 8, 9, 11, 16, 16, 18, 20, 23, 24, 27, + 30, 31, 33, 37, 37, 39, 41, 43, 44, 46, 46, 47, 49, 50, 50, 51, 53, + 57, 58, 59, 66, 67, 68, 69, 71, 72, 75, 78, 83, 86, 90, 93, 94, 96, + 98, 99, 101, 101, 104, 106, 108, 109, 109, 109, 109, 111, 113, 118, 120, 121, 123, + 124, 128, 132, 134, 136, 138, 139, 141, 145, 148, 151, 152, 154, 155, 157, 160, 164, + 167, 170, 170, 170, 173, 178, 179, 182, 184, 186, 191, 192, 196, 198, 198, 198}, + {44, 68, 74, 16, 36, 85, 34, 75, 61, 51, 83, 15, 33, 55, 69, 71, 18, 84, 70, 95, 71, 83, + 97, 83, 9, 36, 54, 4, 42, 46, 52, 11, 89, 31, 37, 74, 96, 36, 88, 56, 64, 68, 94, 82, + 35, 90, 50, 82, 85, 83, 19, 47, 94, 9, 44, 56, 79, 6, 25, 4, 15, 21, 52, 75, 79, 92, + 19, 72, 94, 94, 96, 80, 16, 54, 89, 46, 48, 63, 3, 33, 67, 73, 77, 46, 47, 75, 16, 43, + 45, 81, 32, 45, 68, 43, 55, 63, 27, 89, 8, 17, 36, 15, 42, 96, 9, 49, 22, 33, 77, 7, + 75, 78, 88, 43, 49, 66, 76, 91, 22, 82, 69, 63, 84, 44, 3, 23, 47, 81, 9, 65, 76, 92, + 12, 96, 9, 13, 38, 93, 44, 3, 19, 6, 36, 45, 61, 63, 69, 89, 44, 57, 94, 62, 33, 36, + 41, 46, 68, 24, 28, 64, 8, 13, 14, 29, 11, 66, 88, 5, 28, 93, 21, 62, 84, 18, 42, 50, + 76, 91, 25, 63, 89, 97, 36, 69, 72, 85, 23, 32, 39, 40, 77, 12, 19, 40, 54, 70, 13, 91}, + {0.4734894, 0.1402491, 0.7686475, 0.0416142, 0.2559651, 0.9360436, 0.7486080, 0.5206724, + 0.0374126, 0.8082515, 0.5993828, 0.4866583, 0.8907925, 0.9251201, 0.8566143, 0.9528994, + 0.4557763, 0.4907070, 0.4158074, 0.8311127, 0.9026024, 0.3103237, 0.5876446, 0.7585195, + 0.4866583, 0.4493615, 0.5909155, 0.0416142, 0.0963910, 0.6722401, 0.3468698, 0.4557763, + 0.1445242, 0.7720124, 0.9923756, 0.1227579, 0.7194629, 0.8916773, 0.4320931, 0.5840980, + 0.0216121, 0.3709223, 0.1705930, 0.8297898, 0.2409706, 0.9585592, 0.3171389, 0.0228039, + 0.4350971, 0.4939908, 0.7720124, 0.2722416, 0.1792683, 0.8907925, 0.1085757, 0.8745620, + 0.3298612, 0.7486080, 0.2409706, 0.2559651, 0.4493615, 0.8916773, 0.5540361, 0.5150571, + 0.9160119, 0.1767728, 0.9923756, 0.5717281, 0.1077409, 0.9368132, 0.6273088, 0.6616613, + 0.0963910, 0.9378265, 0.3059566, 0.3159291, 0.0449106, 0.9085807, 0.4734894, 0.1085757, + 0.2909013, 0.7787509, 0.7168902, 0.9691764, 0.2669757, 0.4389115, 0.6722401, 0.3159291, + 0.9691764, 0.7467896, 0.2722416, 0.2669757, 0.1532843, 0.0449106, 0.2023634, 0.8934466, + 0.3171389, 0.6594226, 0.8082515, 0.3468698, 0.5540361, 0.5909155, 0.9378265, 0.2909178, + 0.9251201, 0.2023634, 0.5840980, 0.8745620, 0.2624605, 0.0374126, 0.1034030, 0.3736577, + 0.3315690, 0.9085807, 0.8934466, 0.5548525, 0.2302140, 0.7827352, 0.0216121, 0.8262919, + 0.1646078, 0.5548525, 0.2658700, 0.2909013, 0.1402491, 0.3709223, 0.1532843, 0.5792196, + 0.8566143, 0.1646078, 0.0827300, 0.5810611, 0.4158074, 0.5188584, 0.9528994, 0.9026024, + 0.5717281, 0.7269946, 0.7787509, 0.7686475, 0.1227579, 0.5206724, 0.5150571, 0.4389115, + 0.1034030, 0.2302140, 0.0827300, 0.8961608, 0.7168902, 0.2624605, 0.4823034, 0.3736577, + 0.3298612, 0.9160119, 0.6616613, 0.7467896, 0.5792196, 0.8297898, 0.0228039, 0.8262919, + 0.5993828, 0.3103237, 0.7585195, 0.4939908, 0.4907070, 0.2658700, 0.0844443, 0.9360436, + 0.4350971, 0.6997072, 0.4320931, 0.3315690, 0.0844443, 0.1445242, 0.3059566, 0.6594226, + 0.8961608, 0.6498466, 0.9585592, 0.7827352, 0.6498466, 0.2812338, 0.1767728, 0.5810611, + 0.7269946, 0.6997072, 0.1705930, 0.1792683, 0.1077409, 0.9368132, 0.4823034, 0.8311127, + 0.7194629, 0.6273088, 0.2909178, 0.5188584, 0.5876446, 0.2812338}, + {-2.0369630, -1.7673520}}}; + +const std::vector> inputsd = { + {2, + 34, + 10000, + 0, + 0, + 1e-15, + 42, + {0, 0, 0, 0, 3, 5, 6, 8, 9, 11, 16, 16, 18, 20, 23, 24, 27, + 30, 31, 33, 37, 37, 39, 41, 43, 44, 46, 46, 47, 49, 50, 50, 51, 53, + 57, 58, 59, 66, 67, 68, 69, 71, 72, 75, 78, 83, 86, 90, 93, 94, 96, + 98, 99, 101, 101, 104, 106, 108, 109, 109, 109, 109, 111, 113, 118, 120, 121, 123, + 124, 128, 132, 134, 136, 138, 139, 141, 145, 148, 151, 152, 154, 155, 157, 160, 164, + 167, 170, 170, 170, 173, 178, 179, 182, 184, 186, 191, 192, 196, 198, 198, 198}, + {44, 68, 74, 16, 36, 85, 34, 75, 61, 51, 83, 15, 33, 55, 69, 71, 18, 84, 70, 95, 71, 83, + 97, 83, 9, 36, 54, 4, 42, 46, 52, 11, 89, 31, 37, 74, 96, 36, 88, 56, 64, 68, 94, 82, + 35, 90, 50, 82, 85, 83, 19, 47, 94, 9, 44, 56, 79, 6, 25, 4, 15, 21, 52, 75, 79, 92, + 19, 72, 94, 94, 96, 80, 16, 54, 89, 46, 48, 63, 3, 33, 67, 73, 77, 46, 47, 75, 16, 43, + 45, 81, 32, 45, 68, 43, 55, 63, 27, 89, 8, 17, 36, 15, 42, 96, 9, 49, 22, 33, 77, 7, + 75, 78, 88, 43, 49, 66, 76, 91, 22, 82, 69, 63, 84, 44, 3, 23, 47, 81, 9, 65, 76, 92, + 12, 96, 9, 13, 38, 93, 44, 3, 19, 6, 36, 45, 61, 63, 69, 89, 44, 57, 94, 62, 33, 36, + 41, 46, 68, 24, 28, 64, 8, 13, 14, 29, 11, 66, 88, 5, 28, 93, 21, 62, 84, 18, 42, 50, + 76, 91, 25, 63, 89, 97, 36, 69, 72, 85, 23, 32, 39, 40, 77, 12, 19, 40, 54, 70, 13, 91}, + {0.4734894, 0.1402491, 0.7686475, 0.0416142, 0.2559651, 0.9360436, 0.7486080, 0.5206724, + 0.0374126, 0.8082515, 0.5993828, 0.4866583, 0.8907925, 0.9251201, 0.8566143, 0.9528994, + 0.4557763, 0.4907070, 0.4158074, 0.8311127, 0.9026024, 0.3103237, 0.5876446, 0.7585195, + 0.4866583, 0.4493615, 0.5909155, 0.0416142, 0.0963910, 0.6722401, 0.3468698, 0.4557763, + 0.1445242, 0.7720124, 0.9923756, 0.1227579, 0.7194629, 0.8916773, 0.4320931, 0.5840980, + 0.0216121, 0.3709223, 0.1705930, 0.8297898, 0.2409706, 0.9585592, 0.3171389, 0.0228039, + 0.4350971, 0.4939908, 0.7720124, 0.2722416, 0.1792683, 0.8907925, 0.1085757, 0.8745620, + 0.3298612, 0.7486080, 0.2409706, 0.2559651, 0.4493615, 0.8916773, 0.5540361, 0.5150571, + 0.9160119, 0.1767728, 0.9923756, 0.5717281, 0.1077409, 0.9368132, 0.6273088, 0.6616613, + 0.0963910, 0.9378265, 0.3059566, 0.3159291, 0.0449106, 0.9085807, 0.4734894, 0.1085757, + 0.2909013, 0.7787509, 0.7168902, 0.9691764, 0.2669757, 0.4389115, 0.6722401, 0.3159291, + 0.9691764, 0.7467896, 0.2722416, 0.2669757, 0.1532843, 0.0449106, 0.2023634, 0.8934466, + 0.3171389, 0.6594226, 0.8082515, 0.3468698, 0.5540361, 0.5909155, 0.9378265, 0.2909178, + 0.9251201, 0.2023634, 0.5840980, 0.8745620, 0.2624605, 0.0374126, 0.1034030, 0.3736577, + 0.3315690, 0.9085807, 0.8934466, 0.5548525, 0.2302140, 0.7827352, 0.0216121, 0.8262919, + 0.1646078, 0.5548525, 0.2658700, 0.2909013, 0.1402491, 0.3709223, 0.1532843, 0.5792196, + 0.8566143, 0.1646078, 0.0827300, 0.5810611, 0.4158074, 0.5188584, 0.9528994, 0.9026024, + 0.5717281, 0.7269946, 0.7787509, 0.7686475, 0.1227579, 0.5206724, 0.5150571, 0.4389115, + 0.1034030, 0.2302140, 0.0827300, 0.8961608, 0.7168902, 0.2624605, 0.4823034, 0.3736577, + 0.3298612, 0.9160119, 0.6616613, 0.7467896, 0.5792196, 0.8297898, 0.0228039, 0.8262919, + 0.5993828, 0.3103237, 0.7585195, 0.4939908, 0.4907070, 0.2658700, 0.0844443, 0.9360436, + 0.4350971, 0.6997072, 0.4320931, 0.3315690, 0.0844443, 0.1445242, 0.3059566, 0.6594226, + 0.8961608, 0.6498466, 0.9585592, 0.7827352, 0.6498466, 0.2812338, 0.1767728, 0.5810611, + 0.7269946, 0.6997072, 0.1705930, 0.1792683, 0.1077409, 0.9368132, 0.4823034, 0.8311127, + 0.7194629, 0.6273088, 0.2909178, 0.5188584, 0.5876446, 0.2812338}, + {-2.0369630, -1.7673520}}}; + +const std::vector> rmat_inputsf = { + {50, + 100, + 10000, + 0, + 0, + 1e-9, + 42, + 12, + 12, + 1, + // {-122.53275 , -74.009415, -59.70774 , -54.678654, -49.700565, + // -34.015884, -32.097626, -31.29491 , -30.33276 , -22.899527, + // -20.49083 , -20.243006, -19.26677 , -18.43743 , -17.671614, + // -17.00962 , -16.72859 , -15.812017, -15.744598, -15.438096, + // -15.030397, -14.721282, -14.146572, -13.959946, -13.640783, + // -13.475106, -13.200468, -12.769644, -12.630838, -12.570684, + // -12.290903, -12.042329, -11.678847, -11.563247, -11.185609, + // -10.919437, -10.785621, -10.566719, -10.202344, -10.014745, + // -9.602258, -9.511378, -9.268343, -8.876679, -8.805339, + // -8.670585, -8.471375, -8.391085, -8.197367, -8.014922} + {-122.53162, -74.046684, -59.7358, -54.70629, -49.729855, -33.997437, -32.072914, -31.306896, + -30.339314, -22.891956, -20.453482, -20.1798, -19.275993, -18.436245, -17.653976, -17.008162, + -16.73615, -15.846376, -15.681458, -15.459055, -15.053776, -14.731912, -14.132045, -13.951516, + -13.603188, -13.477833, -13.191872, -12.771893, -12.634565, -12.592889, -12.280662, -12.032298, + -11.667132, -11.555687, -11.176134, -10.875261, -10.79131, -10.538387, -10.242246, -9.957915, + -9.627112, -9.508455, -9.254543, -8.852059, -8.82837, -8.712086, -8.445032, -8.385991, + -8.194637, -7.9835095}}}; + +const std::vector> rmat_inputsd = { + {50, 100, 10000, 0, 0, 1e-9, 42, 12, 12, 1, {-122.53275, -74.009415, -59.70774, -54.678654, + -49.700565, -34.015884, -32.097626, -31.29491, + -30.33276, -22.899527, -20.49083, -20.243006, + -19.26677, -18.43743, -17.671614, -17.00962, + -16.72859, -15.812017, -15.744598, -15.438096, + -15.030397, -14.721282, -14.146572, -13.959946, + -13.640783, -13.475106, -13.200468, -12.769644, + -12.630838, -12.570684, -12.290903, -12.042329, + -11.678847, -11.563247, -11.185609, -10.919437, + -10.785621, -10.566719, -10.202344, -10.014745, + -9.602258, -9.511378, -9.268343, -8.876679, + -8.805339, -8.670585, -8.471375, -8.391085, + -8.197367, -8.014922}}}; + +using LanczosTestF = lanczos_tests; +TEST_P(LanczosTestF, Result) { Run(); } + +using LanczosTestD = lanczos_tests; +TEST_P(LanczosTestD, Result) { Run(); } + +using RmatLanczosTestF = rmat_lanczos_tests; +TEST_P(RmatLanczosTestF, Result) { Run(); } + +// using RmatLanczosTestD = rmat_lanczos_tests; +// TEST_P(RmatLanczosTestD, Result) +// { +// Run(); +// } + +template +void save_vectors(const std::string& filename, + const std::vector& rows, + const std::vector& cols, + const std::vector& vals) +{ + std::ofstream out(filename, std::ios::binary); + + // Save the size of each vector + size_t size_rows = rows.size(); + size_t size_cols = cols.size(); + size_t size_vals = vals.size(); + + out.write(reinterpret_cast(&size_rows), sizeof(size_rows)); + out.write(reinterpret_cast(&size_cols), sizeof(size_cols)); + out.write(reinterpret_cast(&size_vals), sizeof(size_vals)); + + // Save the vectors + out.write(reinterpret_cast(rows.data()), size_rows * sizeof(index_type)); + out.write(reinterpret_cast(cols.data()), size_cols * sizeof(index_type)); + out.write(reinterpret_cast(vals.data()), size_vals * sizeof(value_type)); + + out.close(); +} + +using DummyLanczosTest = dummy_lanczos_tests; +TEST_P(DummyLanczosTest, Result) +{ + raft::resources handle; + cudaStream_t stream = resource::get_cuda_stream(handle); + raft::random::RngState rng(42); + + using index_type = int; + using value_type = float; + int r_scale = 12; + int c_scale = 12; + float sparsity = 1; + uint64_t n_edges = sparsity * ((long long)(1 << r_scale) * (long long)(1 << c_scale)); + uint64_t n_nodes = 1 << std::max(r_scale, c_scale); + uint64_t theta_len = std::max(r_scale, c_scale) * 4; + + std::cout << "n_edges" << n_edges << std::endl; + std::cout << "n_nodes" << n_nodes << std::endl; + + raft::device_vector theta = + raft::make_device_vector(handle, theta_len); + raft::random::uniform(handle, rng, theta.view(), 0, 1); + // print_device_vector("theta", theta.data_handle(), theta_len, std::cout); + + raft::device_matrix out = + raft::make_device_matrix(handle, n_edges * 2, 2); + + raft::device_vector out_src = + raft::make_device_vector(handle, n_edges); + raft::device_vector out_dst = + raft::make_device_vector(handle, n_edges); + + raft::random::rmat_rectangular_gen(handle, + rng, + make_const_mdspan(theta.view()), + out.view(), + out_src.view(), + out_dst.view(), + r_scale, + c_scale); + + // print_device_vector("out", out.data_handle(), n_edges*2, std::cout); + // print_device_vector("out_src", out_src.data_handle(), n_edges, std::cout); + // print_device_vector("out_dst", out_dst.data_handle(), n_edges, std::cout); + + raft::device_vector out_data = + raft::make_device_vector(handle, n_edges); + raft::matrix::fill(handle, out_data.view(), 1.0F); + raft::sparse::COO coo(stream); + + raft::sparse::op::coo_sort(n_nodes, + n_nodes, + n_edges, + out_src.data_handle(), + out_dst.data_handle(), + out_data.data_handle(), + stream); + raft::sparse::op::max_duplicates(handle, + coo, + out_src.data_handle(), + out_dst.data_handle(), + out_data.data_handle(), + n_edges, + n_nodes, + n_nodes); + + // print_device_vector("coo_rows", coo.rows(), coo.nnz, std::cout); + // print_device_vector("coo_cols", coo.cols(), coo.nnz, std::cout); + // print_device_vector("coo_vals", coo.vals(), coo.nnz, std::cout); + + // print_device_vector("csr_row_indices", row_indices.data_handle(), coo.n_rows + 1, std::cout); + + raft::sparse::COO symmetric_coo(stream); + raft::sparse::linalg::symmetrize( + handle, coo.rows(), coo.cols(), coo.vals(), coo.n_rows, coo.n_cols, coo.nnz, symmetric_coo); + + raft::device_vector row_indices = + raft::make_device_vector(handle, + symmetric_coo.n_rows + 1); + raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), + symmetric_coo.nnz, + row_indices.data_handle(), + symmetric_coo.n_rows + 1, + stream); + + // print_device_vector("sym_coo_rows", symmetric_coo.rows(), symmetric_coo.nnz, std::cout); + // print_device_vector("sym_coo_cols", symmetric_coo.cols(), symmetric_coo.nnz, std::cout); + // print_device_vector("sym_coo_vals", symmetric_coo.vals(), symmetric_coo.nnz, std::cout); + + std::vector rowsH(symmetric_coo.n_rows + 1); + std::vector colsH(symmetric_coo.nnz); + std::vector valsH(symmetric_coo.nnz); + raft::copy(rowsH.data(), row_indices.data_handle(), symmetric_coo.n_rows + 1, stream); + raft::copy(colsH.data(), symmetric_coo.cols(), symmetric_coo.nnz, stream); + raft::copy(valsH.data(), symmetric_coo.vals(), symmetric_coo.nnz, stream); + + save_vectors("sparse.bin", rowsH, colsH, valsH); +} + +INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestF, ::testing::ValuesIn(inputsf)); +INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestD, ::testing::ValuesIn(inputsd)); +INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestF, ::testing::ValuesIn(rmat_inputsf)); +// INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestD, ::testing::ValuesIn(rmat_inputsd)); + +INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); + +} // namespace sparse +} // namespace raft From a4cdc7a2804a9d9871529e53f06143ecd06febc6 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 21 Aug 2024 23:22:28 +0000 Subject: [PATCH 06/23] clean up code --- .../raft/sparse/solver/detail/lanczos.cuh | 537 +----------------- 1 file changed, 8 insertions(+), 529 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 73cd84686f..ade2571a3d 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1461,18 +1461,6 @@ RAFT_KERNEL kernel_get_last_row(const T* M, T* S, int numRows, int numCols) template RAFT_KERNEL kernel_triangular_populate(T* M, const T* beta, int n) { - // int row = blockIdx.x * blockDim.x + threadIdx.x; - // if (row < n) { - // // Upper diagonal - // if (row < n - 1) { - // M[row * n + (row + 1)] = beta[row]; - // } - - // // Lower diagonal - // if (row > 0) { - // M[row * n + (row - 1)] = beta[row - 1]; - // } - // } int row = blockIdx.x * blockDim.x + threadIdx.x; if (row < n) { @@ -1487,13 +1475,6 @@ RAFT_KERNEL kernel_triangular_populate(T* M, const T* beta, int n) template RAFT_KERNEL kernel_triangular_beta_k(T* t, const T* beta_k, int k, int n) { - // int tid = threadIdx.x + blockIdx.x * blockDim.x; - // if (tid < k) { - // // Update the k-th row - // t[k * n + tid] = beta_k[tid]; - // // Update the k-th column - // t[tid * n + k] = beta_k[tid]; - // } int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < k) { @@ -1508,16 +1489,6 @@ RAFT_KERNEL kernel_triangular_beta_k(T* t, const T* beta_k, int k, int n) template RAFT_KERNEL kernel_normalize(const T* u, const T* beta, int j, int n, T* v, T* V, int size) { - // FIXME: custom cuda kernel vs raft primitives? - // # Normalize - // _kernel_normalize(u, beta, i, n, v, V) - - // _kernel_normalize = cupy.ElementwiseKernel( - // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', - // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') - - // v = u / beta[j]; - // V[i + (j+1) * n] = v; int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) { @@ -1555,47 +1526,6 @@ void cupy_solve_ritz( raft::device_matrix_view eigenvectors, raft::device_vector_view eigenvalues) { - // # Note: This is done on the CPU, because there is an issue in - // # cupy.linalg.eigh with CUDA 9.2, which can return NaNs. It will has little - // # impact on performance, since the matrix size processed here is not large. - // alpha = cupy.asnumpy(alpha) - // beta = cupy.asnumpy(beta) - // t = numpy.diag(alpha) - // t = t + numpy.diag(beta[:-1], k=1) - // t = t + numpy.diag(beta[:-1], k=-1) - // if beta_k is not None: - // beta_k = cupy.asnumpy(beta_k) - // t[k, :k] = beta_k - // t[:k, k] = beta_k - // w, s = numpy.linalg.eigh(t) - - // # Pick-up k ritz-values and ritz-vectors - // if which == 'LA': - // idx = numpy.argsort(w) - // wk = w[idx[-k:]] - // sk = s[:, idx[-k:]] - // elif which == 'LM': - // idx = numpy.argsort(numpy.absolute(w)) - // wk = w[idx[-k:]] - // sk = s[:, idx[-k:]] - - // elif which == 'SA': - // idx = numpy.argsort(w) - // wk = w[idx[:k]] - // sk = s[:, idx[:k]] - // # elif which == 'SM': #dysfunctional - // # idx = cupy.argsort(abs(w)) - // # wk = w[idx[:k]] - // # sk = s[:,idx[:k]] - // return cupy.array(wk), cupy.array(sk) - - // FIXME: select the deterministic mode handle? - // cusolverStatus_t - // cusolverDnSetDeterministicMode(cusolverDnHandle_t handle, cusolverDeterministicMode_t mode) - - // FIXME: use public raft apis instead of using detail - - // add some primitives to create triangular dense matrix? auto stream = resource::get_cuda_stream(handle); value_type_t zero = 0; @@ -1606,18 +1536,11 @@ void cupy_solve_ritz( raft::matrix::initializeDiagonalMatrix( alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); - // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); - int blockSize = 256; int numBlocks = (ncv + blockSize - 1) / blockSize; kernel_triangular_populate <<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); - // if beta_k is not None: - // beta_k = cupy.asnumpy(beta_k) - // t[k, :k] = beta_k - // t[:k, k] = beta_k - if (beta_k) { int threadsPerBlock = 256; int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; @@ -1625,16 +1548,10 @@ void cupy_solve_ritz( triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); } - // print_device_vector("ritz triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); - auto triangular_matrix_view = raft::make_device_matrix_view( triangular_matrix.data_handle(), ncv, ncv); - // print_device_vector("triangular", triangular_matrix.data_handle(), ncv*ncv, std::cout); - - // raft::linalg::eig_jacobi(handle, triangular_matrix_view, eigenvectors, eigenvalues, zero); - // Lapack::steqr() raft::linalg::eig_dc(handle, triangular_matrix_view, eigenvectors, eigenvalues); } @@ -1655,37 +1572,10 @@ void cupy_aux(raft::resources const& handle, auto stream = resource::get_cuda_stream(handle); int n = A->nrows_; - // std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - // int i = 0; - // int b = 0; - // int one = 1; - // int zero = 0; - // int mone = -1; - - // auto V_const = raft::make_device_matrix_view(V.data_handle(), ncv, n); - - // v[...] = V[i_start] raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); - // auto mp = raft::make_device_vector(handle, 1); - // raft::matrix::fill(handle, mp.view(), start_idx); - // auto mp_const = raft::make_device_vector_view(mp.data_handle(), 1); - // auto v_view = raft::make_device_matrix_view(v.data_handle(), 1, n); - - // raft::matrix::gather(handle, V_const, mp_const, v_view); std::cout << start_idx << " " << end_idx << std::endl; - // print_device_vector("V", V.data_handle(), n*ncv, std::cout); - // print_device_vector("u", u.data_handle(), n, std::cout); - // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); - // print_device_vector("beta", beta.data_handle(), ncv, std::cout); - // print_device_vector("v", v.data_handle(), n, std::cout); - // print_device_vector("uu", v.data_handle(), n, std::cout); - // print_device_vector("vv", v.data_handle(), n, std::cout); - - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; @@ -1702,11 +1592,6 @@ void cupy_aux(raft::resources const& handle, raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, v.data_handle()); raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); - // if (start_idx == 0) { - // print_device_vector("spmv v", v.data_handle(), n, std::cout); - // print_device_vector("spmv u", u.data_handle(), n, std::cout); - // } - value_type_t one = 1; value_type_t zero = 0; size_t bufferSize; @@ -1735,59 +1620,12 @@ void cupy_aux(raft::resources const& handle, cusparse_spmv_buffer.data_handle(), stream); - // if (start_idx == 0 && i == 0) { - // print_device_vector("u spmv", u.data_handle(), n, std::cout); - // } - // print_device_vector("u spmv", u.data_handle(), n, std::cout); - - // # Call dotc: alpha[i] = v.conj().T @ u - // _cublas.setPointerMode( - // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - // try: - // dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, - // alpha.data.ptr + i * alpha.itemsize) - // finally: - // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - // conjugate is only for complex numbers - // we should only have real numbers - - // FIXME: loop index auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); auto v_vector = raft::make_device_vector_view(v.data_handle(), n); auto u_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::dot(handle, v_vector, u_vector, alpha_i); - // print_device_vector("alpha[i]", &alpha(0, i), 1, std::cout); - - // # Orthogonalize: u = u - alpha[i] * v - beta[i - 1] * V[i - 1] - // vv.fill(0) - // b[...] = beta[i - 1] # cast from real to complex - // print("vv", vv) - // print("b", b, "beta[i-1]", beta[i-1]) - // _cublas.setPointerMode( - // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - // try: - // axpy(cublas_handle, n, - // alpha.data.ptr + i * alpha.itemsize, - // v.data.ptr, 1, vv.data.ptr, 1) - // axpy(cublas_handle, n, - // b.data.ptr, - // V[i - 1].data.ptr, 1, vv.data.ptr, 1) - // finally: - // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - // axpy(cublas_handle, n, - // mone.ctypes.data, - // vv.data.ptr, 1, u.data.ptr, 1) - // FIXME: beta(0, i-1) raft::matrix::fill(handle, vv, zero); - // raft::device_scalar_view beta_view = make_device_scalar_view(&beta(0, 0)); - // value_type_t scalar; - // raft::copy(&scalar, &beta(0, 0), 1, stream); - // const value_type_t scalar_const = scalar; - - // auto b = raft::make_device_scalar(handle, scalar_const); auto cublas_h = resource::get_cublas_handle(handle); @@ -1795,60 +1633,18 @@ void cupy_aux(raft::resources const& handle, value_type_t b = 0; value_type_t mone = -1; - // FIXME: alpha(0, i) raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); raft::copy(&alpha_i_host, &(alpha(0, i)), 1, stream); - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - raft::linalg::detail::cublasaxpy( cublas_h, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); - // // FIXME: &V(i, 0) - // std::cout << "got here axpy" << std::endl; + raft::linalg::detail::cublasaxpy( cublas_h, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); - // std::cout << "got here axpy" << std::endl; raft::linalg::detail::cublasaxpy( cublas_h, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); - // if (start_idx == 7 && i == 7) { - // print_device_vector("axpy u", u.data_handle(), n, std::cout); - // } - - // std::cout << "got here axpy" << std::endl; - - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - // std::cout << "got here axpy" << std::endl; - - // print_device_vector("ortho u", u.data_handle(), n, std::cout); - - // # Reorthogonalize: u -= V @ (V.conj().T @ u) - // gemv(cublas_handle, _cublas.CUBLAS_OP_C, - // n, i + 1, - // one.ctypes.data, V.data.ptr, n, - // u.data.ptr, 1, - // zero.ctypes.data, uu.data.ptr, 1) - // gemv(cublas_handle, _cublas.CUBLAS_OP_N, - // n, i + 1, - // mone.ctypes.data, V.data.ptr, n, - // uu.data.ptr, 1, - // one.ctypes.data, u.data.ptr, 1) - // alpha[i] += uu[i] - - // Are we transposing because of row-major to column-major since gemv requires column-major - - // ncv * n * - // std::cout << i << std::endl; - // if (start_idx == 7 && end_idx == 38 && i == 7) { - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - // print_device_vector("ortho u", u.data_handle(), n, std::cout); - // } - // if (start_idx == 0 && end_idx == 38 && i == 0) { - // print_device_vector("ortho V", V.data_handle(), n*ncv, std::cout); - // print_device_vector("ortho u", u.data_handle(), n, std::cout); - // } - raft::linalg::detail::cublasgemv(cublas_h, CUBLAS_OP_T, n, @@ -1880,26 +1676,11 @@ void cupy_aux(raft::resources const& handle, auto uu_i = raft::make_device_scalar_view(&uu(0, i)); raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); - // flush alpha kernel_clamp_down<<<1, 1>>>(alpha_i.data_handle(), static_cast(1e-9)); - // print_device_vector("gemv uu[i]", &uu(0, i), 1, std::cout); - // print_device_vector("gemv alpha[i]", &alpha(0, i), 1, std::cout); - // print_device_vector("gemv u", u.data_handle(), n, std::cout); - - // FIXME: pointer mode for alpha beta? - // # Call nrm2 - // _cublas.setPointerMode( - // cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - // try: - // nrm2(cublas_handle, n, u.data.ptr, 1, - // beta.data.ptr + i * beta.itemsize) - // finally: - // _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &beta(0, i), stream); - // print_device_vector("nrm2 beta[i]", &beta(0, i), 1, std::cout); + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); int blockSize = 256; @@ -1910,38 +1691,13 @@ void cupy_aux(raft::resources const& handle, kernel_clamp_down<<<1, 1>>>(&beta(0, i), static_cast(1e-6)); - // FIXME: - // # Break here as the normalization below touches V[i+1] - // if i >= i_end - 1: - // break if (i >= end_idx - 1) { break; } - // FIXME: custom cuda kernel vs raft primitives? - // # Normalize - // _kernel_normalize(u, beta, i, n, v, V) - - // _kernel_normalize = cupy.ElementwiseKernel( - // 'T u, raw S beta, int32 j, int32 n', 'T v, raw T V', - // 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize') - - // v = u / beta[j]; - // V[i + (j+1) * n] = v; - int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; kernel_normalize<<>>( u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); - - // print_device_vector("kernel normalize v", v.data_handle(), n, std::cout); - // print_device_vector("kernel normalize V", V.data_handle(), n*ncv, std::cout); - - // raft::linalg::unary_op(handle,u, v, - // [device_scalar = beta(0, i)] __device__(auto y) { - // return y / *device_scalar; - // }); - - // raft::copy(&V(i + (j+1) * n, 0), v.data_handle(), n, stream); } } @@ -1957,27 +1713,18 @@ int cupy_smallest(raft::resources const& handle, value_type_t* v0, uint64_t seed) { - // std::cout << "hello cupy smallest " << A->nrows_ << " " << A->ncols_ << " " << A->nnz_ << - // std::endl; - - int n = A->nrows_; - int ncv = restartIter; - // raft::print_device_vector("hello cupy v0 init", v0, n, std::cout); + int n = A->nrows_; + int ncv = restartIter; auto stream = resource::get_cuda_stream(handle); std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - // print_device_vector("v0_cpp", v0, n, std::cout); - - // u = v0 - // V[0] = v0 / cublas.nrm2(v0) raft::device_matrix V = raft::make_device_matrix(handle, ncv, n); raft::device_matrix_view V_0_view = raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] raft::device_matrix_view v0_view = raft::make_device_matrix_view(v0, 1, n); - // raft::linalg::row_normalize(handle, v0_view, V_0_view, raft::linalg::L2Norm); raft::device_matrix u = raft::make_device_matrix(handle, 1, n); @@ -1986,14 +1733,11 @@ int cupy_smallest(raft::resources const& handle, auto cublas_h = resource::get_cublas_handle(handle); value_type_t v0nrm = 0; raft::linalg::detail::cublasnrm2(cublas_h, n, v0_view.data_handle(), 1, &v0nrm, stream); - // std::cout << "v0nrm " << v0nrm << std::endl; raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); raft::device_vector_view v0_vector_const = raft::make_device_vector_view(v0, n); - // raft::device_vector_view v0_vector = - // raft::make_device_vector_view(v0, n); raft::linalg::unary_op( handle, @@ -2001,10 +1745,6 @@ int cupy_smallest(raft::resources const& handle, V_0_view, [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { return y / *device_scalar; }); - // print_device_vector("V[0]", V_0_view.data_handle(), n, std::cout); - - // print_device_vector("V[0]", V.data_handle(), n, std::cout); - raft::device_matrix alpha = raft::make_device_matrix(handle, 1, ncv); raft::device_matrix beta = @@ -2013,25 +1753,6 @@ int cupy_smallest(raft::resources const& handle, raft::matrix::fill(handle, alpha.view(), zero); raft::matrix::fill(handle, beta.view(), zero); - // start allocating for cupy_lanczos_fast() - - // cusparse_handle = None - // if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - // cusparse_handle = device.get_cusparse_handle() - // spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - // spmv_alpha = numpy.array(1.0, A.dtype) - // spmv_beta = numpy.array(0.0, A.dtype) - // spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - // spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - // v = cupy.empty((n,), dtype=A.dtype) - // uu = cupy.empty((ncv,), dtype=A.dtype) - // vv = cupy.empty((n,), dtype=A.dtype) - // b = cupy.empty((), dtype=A.dtype) - // one = numpy.array(1.0, dtype=A.dtype) - // zero = numpy.array(0.0, dtype=A.dtype) - // mone = numpy.array(-1.0, dtype=A.dtype) - raft::device_matrix v = raft::make_device_matrix(handle, 1, n); raft::device_matrix aux_uu = @@ -2039,7 +1760,6 @@ int cupy_smallest(raft::resources const& handle, raft::device_matrix vv = raft::make_device_matrix(handle, 1, n); - // cupy_aux(A, V.view(), u_view, alpha.view(), beta.view()); cupy_aux(handle, A, V.view(), @@ -2053,18 +1773,6 @@ int cupy_smallest(raft::resources const& handle, aux_uu.view(), vv.view()); - // # Lanczos iteration - // lanczos(a, V, u, alpha, beta, 0, ncv) - - // iter = ncv - // w, s = _eigsh_solve_ritz(alpha, beta, None, k, which) - // x = V.T @ s - - // # Compute residual - // beta_k = beta[-1] * s[-1, :] - // res = cublas.nrm2(beta_k) - - // uu = cupy.empty((k,), dtype=a.dtype) auto eigenvectors = raft::make_device_matrix(handle, ncv, ncv); auto eigenvalues = raft::make_device_vector(handle, ncv); @@ -2078,11 +1786,6 @@ int cupy_smallest(raft::resources const& handle, ncv, eigenvectors.view(), eigenvalues.view()); - // print_device_vector("V", V.data_handle(), n*ncv, std::cout); - // print_device_vector("u", u.data_handle(), n, std::cout); - // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); - // print_device_vector("beta", beta.data_handle(), ncv, std::cout); - // print_device_vector("v", v.data_handle(), n, std::cout); auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); @@ -2090,13 +1793,6 @@ int cupy_smallest(raft::resources const& handle, raft::make_device_vector_view( eigenvalues.data_handle(), nEigVecs); - // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); - // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); - - // x = V.T @ s - - // ncv*n x ncv*nEigVecs - auto ritz_eigenvectors = raft::make_device_matrix_view( eigVecs_dev, n, nEigVecs); @@ -2105,15 +1801,6 @@ int cupy_smallest(raft::resources const& handle, raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); - // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, - // std::cout); - - // # Compute residual - // beta_k = beta[-1] * s[-1, :] - // res = cublas.nrm2(beta_k) - - // FIXME: raft::linalg::map_offset() - // Define grid and block sizes int blockSize = 256; // Number of threads per block int numBlocks = (nEigVecs + blockSize - 1) / blockSize; @@ -2121,110 +1808,34 @@ int cupy_smallest(raft::resources const& handle, kernel_get_last_row<<>>( eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); - // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); - auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); - // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), - // nEigVecs); auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); - // auto cublas_h = resource::get_cublas_handle(handle); value_type_t res = 0; raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); - // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); - // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); - - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); - // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); - // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); std::cout << "res " << res << std::endl; - // uu = cupy.empty((k,), dtype=a.dtype) - - // while res > tol and iter < maxiter: - // # Setup for thick-restart - // beta[:k] = 0 - // alpha[:k] = w - // V[:k] = x.T - - // # u -= u.T @ V[:k].conj().T @ V[:k] - // cublas.gemv(_cublas.CUBLAS_OP_C, 1, V[:k].T, u, 0, uu) - // cublas.gemv(_cublas.CUBLAS_OP_N, -1, V[:k].T, uu, 1, u) - // V[k] = u / cublas.nrm2(u) - - // u[...] = a @ V[k] - // cublas.dotc(V[k], u, out=alpha[k]) - // u -= alpha[k] * V[k] - // u -= V[:k].T @ beta_k - // cublas.nrm2(u, out=beta[k]) - // V[k+1] = u / beta[k] - - // # Lanczos iteration - // lanczos(a, V, u, alpha, beta, k + 1, ncv) - - // iter += ncv - k - // w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) - // x = V.T @ s - - // # Compute residual - // beta_k = beta[-1] * s[-1, :] - // res = cublas.nrm2(beta_k) - - // print(iter, w, res) - auto uu = raft::make_device_matrix(handle, 0, nEigVecs); int iter = ncv; while (res > tol && iter < maxIter) { - // setup for thick-restart - // beta[:k] = 0 auto beta_view = raft::make_device_matrix_view( beta.data_handle(), 1, nEigVecs); raft::matrix::fill(handle, beta_view, zero); - // alpha[:k] = w - raft::copy(alpha.data_handle(), eigenvalues_k.data_handle(), nEigVecs, stream); - // V[:k] = x.T - // auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); auto V_k_view = - // raft::make_device_matrix_view(V.data_handle(), nEigVecs, n); + raft::copy(alpha.data_handle(), eigenvalues_k.data_handle(), nEigVecs, stream); - // auto x_T = raft::make_device_matrix(handle, nEigVecs, n); auto x_T = raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); - // raft::linalg::transpose(handle, ritz_eigenvectors, x_T.view()); raft::copy(V.data_handle(), x_T.data_handle(), nEigVecs * n, stream); - // print_device_vector("V[:k]", V.data_handle(), nEigVecs * n, std::cout); - - // FIXME: manually multiply eigenvectors by -1 to see if that fixes anything - // 0, 1, 2, 5 - // auto V_zero = raft::make_device_vector_view(V.data_handle(), n); - // auto V_one = raft::make_device_vector_view(&((V.view()(1, 0))), n); - // auto V_two = raft::make_device_vector_view(&((V.view()(2, 0))), n); - // auto V_five = raft::make_device_vector_view(&((V.view()(5, 0))), n); - - // auto minusone = raft::make_host_scalar(-1); - - // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_zero), V_zero, minusone.view()); - // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_one), V_one, minusone.view()); - // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_two), V_two, minusone.view()); - // raft::linalg::multiply_scalar(handle, make_const_mdspan(V_five), V_five, minusone.view()); - value_type_t one = 1; value_type_t mone = -1; - // # u -= u.T @ V[:k].conj().T @ V[:k] - // cublas.gemv(_cublas.CUBLAS_OP_C, 1, V[:k].T, u, 0, uu) - // cublas.gemv(_cublas.CUBLAS_OP_N, -1, V[:k].T, uu, 1, u) - // V[k] = u / cublas.nrm2(u) - - // FIXME: uu is too small? raft::linalg::detail::cublasgemv(cublas_h, CUBLAS_OP_T, @@ -2254,20 +1865,15 @@ int cupy_smallest(raft::resources const& handle, 1, stream); - // V[k] = u / cublas.nrm2(u) raft::device_matrix_view V_0_view = - raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] - // auto cublas_h = resource::get_cublas_handle(handle); + raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); value_type_t unrm = 0; raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &unrm, stream); - // std::cout << "v0nrm " << v0nrm << std::endl; raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); raft::device_vector_view u_vector_const = raft::make_device_vector_view(u.data_handle(), n); - // raft::device_vector_view u_vector = - // raft::make_device_vector_view(u.data_handle(), n); raft::linalg::unary_op(handle, u_vector_const, @@ -2276,18 +1882,6 @@ int cupy_smallest(raft::resources const& handle, return y / *device_scalar; }); - // raft::device_matrix_view V_0_view = - // raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); // Row V[k] - // raft::linalg::row_normalize(handle, raft::make_const_mdspan(u.view()), V_0_view, - // raft::linalg::L2Norm); print_device_vector("V[k]", V_0_view.data_handle(), n, std::cout); - - // u[...] = a @ V[k] - // cublas.dotc(V[k], u, out=alpha[k]) - // u -= alpha[k] * V[k] - // u -= V[:k].T @ beta_k - // cublas.nrm2(u, out=beta[k]) - // V[k+1] = u / beta[k] - auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; raft::sparse::detail::cusparsecreatecsr(&cusparse_A, @@ -2303,7 +1897,6 @@ int cupy_smallest(raft::resources const& handle, raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, V_0_view.data_handle()); raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); - // value_type_t one = 1; value_type_t zero = 0; size_t bufferSize; raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, @@ -2329,13 +1922,6 @@ int cupy_smallest(raft::resources const& handle, cusparse_spmv_buffer.data_handle(), stream); - // print_device_vector("u spmv", u.data_handle(), n, std::cout); - - // auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); - // auto v_vector = raft::make_device_vector_view(v.data_handle(), n); - // auto u_vector = raft::make_device_vector_view(u.data_handle(), n); - // raft::linalg::dot(handle, v_vector, u_vector, alpha_i); - auto alpha_k = raft::make_device_scalar_view(&((alpha.view())(0, nEigVecs))); auto V_0_view_vector = raft::make_device_vector_view(V_0_view.data_handle(), n); @@ -2343,47 +1929,20 @@ int cupy_smallest(raft::resources const& handle, raft::linalg::dot(handle, V_0_view_vector, u_view_vector, alpha_k); - // raft::linalg::multiply_scalar(handle, V_0_view, u.view()); - // raft::linalg::unary_op(handle, V_0_view, u.view(), [device_scalar = alpha_k.data_handle()] - // __device__(auto y) { - // return y * (*device_scalar); - // }); int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; - // kernel_subtract_and_scale<<>>(u.data_handle(), a, a, n); kernel_subtract_and_scale<<>>( u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); - // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); - - // u -= V[:k].T @ beta_k - // cublas.nrm2(u, out=beta[k]) - // V[k+1] = u / beta[k] - auto temp = raft::make_device_vector(handle, n); - // print_device_vector("temp", temp.data_handle(), n, std::cout); - auto V_k = raft::make_device_matrix_view( V.data_handle(), nEigVecs, n); auto V_k_T = raft::make_device_matrix(handle, n, nEigVecs); - // print_device_vector("V_k", V_k.data_handle(), nEigVecs*n, std::cout); - raft::linalg::transpose(handle, V_k, V_k_T.view()); - // print_device_vector("V_k_T", V_k_T.data_handle(), nEigVecs*n, std::cout); - - // (n, nEigVecs) x (nEigVecs) - - // auto beta_k_vector = raft::make_device_vector_view(beta_k.data_handle(), nEigVecs); - - // raft::linalg::gemv(handle, - // make_const_mdspan(V_k_T.view()), beta_k_vector, temp.view()); - - // FIXME: build small test case for cublasgemv value_type_t three = 3; value_type_t two = 2; @@ -2395,19 +1954,6 @@ int cupy_smallest(raft::resources const& handle, auto out = raft::make_device_vector(handle, 3); raft::copy(M_dev.data_handle(), M.data(), 6, stream); raft::copy(vec_dev.data_handle(), vec.data(), 2, stream); - // raft::linalg::detail::cublasgemv(cublas_h, - // CUBLAS_OP_N, - // three, - // two, - // &myone, - // M_dev.data_handle(), - // two, - // vec_dev.data_handle(), - // 1, - // &myzero, - // out.data_handle(), - // 1, - // stream); raft::linalg::detail::cublasgemv(cublas_h, CUBLAS_OP_N, @@ -2423,8 +1969,6 @@ int cupy_smallest(raft::resources const& handle, 1, stream); - // print_device_vector("out", out.data_handle(), 3, std::cout); - raft::linalg::detail::cublasgemv(cublas_h, CUBLAS_OP_N, n, @@ -2443,16 +1987,9 @@ int cupy_smallest(raft::resources const& handle, kernel_subtract_and_scale<<>>( u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); - // print_device_vector("V", V.data_handle(), nEigVecs*n, std::cout); - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); - - // print_device_vector("temp", temp.data_handle(), n, std::cout); - // print_device_vector("u subtract and scale", u.data_handle(), n, std::cout); - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); raft::linalg::detail::cublasnrm2( cublas_h, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); - // print_device_vector("nrm2 u", &((beta.view())(0, nEigVecs)), 1, std::cout); raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); @@ -2465,27 +2002,6 @@ int cupy_smallest(raft::resources const& handle, return y / *device_scalar; }); - // print_device_vector("V[k+1]", V_kplus1.data_handle(), n, std::cout); - - // # Lanczos iteration - // lanczos(a, V, u, alpha, beta, k + 1, ncv) - - // iter += ncv - k - // w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) - // x = V.T @ s - - // # Compute residual - // beta_k = beta[-1] * s[-1, :] - // res = cublas.nrm2(beta_k) - // print_device_vector("before alpha.view", alpha.data_handle(), ncv, std::cout); - // print_device_vector("before beta.view", beta.data_handle(), ncv, std::cout); - - // print_device_vector("V", V.data_handle(), n*ncv, std::cout); - // print_device_vector("u", u.data_handle(), n, std::cout); - // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); - // print_device_vector("beta", beta.data_handle(), ncv, std::cout); - // print_device_vector("v", v.data_handle(), n, std::cout); - cupy_aux(handle, A, V.view(), @@ -2498,9 +2014,6 @@ int cupy_smallest(raft::resources const& handle, v.view(), aux_uu.view(), vv.view()); - // print_device_vector("alpha", alpha.data_handle(), ncv, std::cout); - // print_device_vector("beta", beta.data_handle(), ncv, std::cout); - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); iter += ncv - nEigVecs; cupy_solve_ritz(handle, alpha.view(), @@ -2513,16 +2026,6 @@ int cupy_smallest(raft::resources const& handle, eigenvalues.view()); auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - // raft::device_vector_view eigenvalues_k = - // raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); - - // print_device_vector("eigenvectors", eigenvectors_k.data_handle(), nEigVecs*ncv, std::cout); - // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); - - // x = V.T @ s - - // ncv*n x ncv*nEigVecs auto ritz_eigenvectors = raft::make_device_matrix_view( eigVecs_dev, n, nEigVecs); @@ -2532,15 +2035,6 @@ int cupy_smallest(raft::resources const& handle, raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); - // print_device_vector("ritz_eigenvectors", ritz_eigenvectors.data_handle(), n*nEigVecs, - // std::cout); - - // # Compute residual - // beta_k = beta[-1] * s[-1, :] - // res = cublas.nrm2(beta_k) - - // FIXME: raft::linalg::map_offset() - // Define grid and block sizes int blockSize = 256; // Number of threads per block int numBlocks = (nEigVecs + blockSize - 1) / blockSize; @@ -2548,35 +2042,20 @@ int cupy_smallest(raft::resources const& handle, kernel_get_last_row<<>>( eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); - // print_device_vector("eigenvectors", eigenvectors.data_handle(), ncv*ncv, std::cout); - // print_device_vector("s_new[-1, :]", s.data_handle(), nEigVecs, std::cout); - - // auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); - // auto s = raft::make_device_vector_view(&eigenvectors_k(ncv - 1, 0), - // nEigVecs); + auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); - // print_device_vector("beta[-1]", beta_scalar.data_handle(), 1, std::cout); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); auto cublas_h = resource::get_cublas_handle(handle); - // value_type_t res = 0; - // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); - // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); - raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); - // print_device_vector("s[-1, :]", s.data_handle(), nEigVecs, std::cout); - // print_device_vector("beta[-1]", &((beta.view())(0, ncv - 1)), 1, std::cout); + raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); - // print_device_vector("beta_k", beta_k.data_handle(), nEigVecs, std::cout); std::cout << "res " << res << " " << iter << std::endl; - // break; } - // print_device_vector("eigenvalues", eigenvalues_k.data_handle(), nEigVecs, std::cout); raft::copy(eigVals_dev, eigenvalues_k.data_handle(), nEigVecs, stream); raft::copy(eigVecs_dev, ritz_eigenvectors.data_handle(), n * nEigVecs, stream); From 2be393a19033b0da0b60375b297b1a41e65143ab Mon Sep 17 00:00:00 2001 From: aamijar Date: Thu, 22 Aug 2024 01:38:28 +0000 Subject: [PATCH 07/23] update gtest rng seed --- cpp/test/sparse/solver/lanczos.cu | 75 +++++++++++++------------------ 1 file changed, 32 insertions(+), 43 deletions(-) diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index fdff55ed12..287c330449 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -127,8 +127,10 @@ class rmat_lanczos_tests raft::device_vector out_dst = raft::make_device_vector(handle, n_edges); + raft::random::RngState rng1{params.seed}; + raft::random::rmat_rectangular_gen(handle, - rng, + rng1, make_const_mdspan(theta.view()), out.view(), out_src.view(), @@ -404,48 +406,34 @@ const std::vector> inputsd = { {-2.0369630, -1.7673520}}}; const std::vector> rmat_inputsf = { - {50, - 100, - 10000, - 0, - 0, - 1e-9, - 42, - 12, - 12, - 1, - // {-122.53275 , -74.009415, -59.70774 , -54.678654, -49.700565, - // -34.015884, -32.097626, -31.29491 , -30.33276 , -22.899527, - // -20.49083 , -20.243006, -19.26677 , -18.43743 , -17.671614, - // -17.00962 , -16.72859 , -15.812017, -15.744598, -15.438096, - // -15.030397, -14.721282, -14.146572, -13.959946, -13.640783, - // -13.475106, -13.200468, -12.769644, -12.630838, -12.570684, - // -12.290903, -12.042329, -11.678847, -11.563247, -11.185609, - // -10.919437, -10.785621, -10.566719, -10.202344, -10.014745, - // -9.602258, -9.511378, -9.268343, -8.876679, -8.805339, - // -8.670585, -8.471375, -8.391085, -8.197367, -8.014922} - {-122.53162, -74.046684, -59.7358, -54.70629, -49.729855, -33.997437, -32.072914, -31.306896, - -30.339314, -22.891956, -20.453482, -20.1798, -19.275993, -18.436245, -17.653976, -17.008162, - -16.73615, -15.846376, -15.681458, -15.459055, -15.053776, -14.731912, -14.132045, -13.951516, - -13.603188, -13.477833, -13.191872, -12.771893, -12.634565, -12.592889, -12.280662, -12.032298, - -11.667132, -11.555687, -11.176134, -10.875261, -10.79131, -10.538387, -10.242246, -9.957915, - -9.627112, -9.508455, -9.254543, -8.852059, -8.82837, -8.712086, -8.445032, -8.385991, - -8.194637, -7.9835095}}}; + {50, 100, 10000, 0, 0, 1e-9, 42, 12, 12, 1, {-122.526794, -74.00686, -59.698284, -54.68617, + -49.686813, -34.02644, -32.130703, -31.26906, + -30.32097, -22.946098, -20.497862, -20.23817, + -19.269697, -18.42496, -17.675667, -17.013401, + -16.734581, -15.820215, -15.73925, -15.448187, + -15.044634, -14.692028, -14.127425, -13.967386, + -13.6237755, -13.469393, -13.181225, -12.777589, + -12.623185, -12.55508, -12.2874565, -12.053391, + -11.677346, -11.558279, -11.163732, -10.922034, + -10.7936945, -10.558049, -10.205776, -10.005316, + -9.559181, -9.491834, -9.242631, -8.883637, + -8.765364, -8.688508, -8.458255, -8.385196, + -8.217982, -8.0442095}}}; const std::vector> rmat_inputsd = { - {50, 100, 10000, 0, 0, 1e-9, 42, 12, 12, 1, {-122.53275, -74.009415, -59.70774, -54.678654, - -49.700565, -34.015884, -32.097626, -31.29491, - -30.33276, -22.899527, -20.49083, -20.243006, - -19.26677, -18.43743, -17.671614, -17.00962, - -16.72859, -15.812017, -15.744598, -15.438096, - -15.030397, -14.721282, -14.146572, -13.959946, - -13.640783, -13.475106, -13.200468, -12.769644, - -12.630838, -12.570684, -12.290903, -12.042329, - -11.678847, -11.563247, -11.185609, -10.919437, - -10.785621, -10.566719, -10.202344, -10.014745, - -9.602258, -9.511378, -9.268343, -8.876679, - -8.805339, -8.670585, -8.471375, -8.391085, - -8.197367, -8.014922}}}; + {50, 100, 10000, 0, 0, 1e-9, 42, 12, 12, 1, {-122.526794, -74.00686, -59.698284, -54.68617, + -49.686813, -34.02644, -32.130703, -31.26906, + -30.32097, -22.946098, -20.497862, -20.23817, + -19.269697, -18.42496, -17.675667, -17.013401, + -16.734581, -15.820215, -15.73925, -15.448187, + -15.044634, -14.692028, -14.127425, -13.967386, + -13.6237755, -13.469393, -13.181225, -12.777589, + -12.623185, -12.55508, -12.2874565, -12.053391, + -11.677346, -11.558279, -11.163732, -10.922034, + -10.7936945, -10.558049, -10.205776, -10.005316, + -9.559181, -9.491834, -9.242631, -8.883637, + -8.765364, -8.688508, -8.458255, -8.385196, + -8.217982, -8.0442095}}}; using LanczosTestF = lanczos_tests; TEST_P(LanczosTestF, Result) { Run(); } @@ -519,8 +507,9 @@ TEST_P(DummyLanczosTest, Result) raft::device_vector out_dst = raft::make_device_vector(handle, n_edges); + raft::random::RngState rng1{42}; raft::random::rmat_rectangular_gen(handle, - rng, + rng1, make_const_mdspan(theta.view()), out.view(), out_src.view(), @@ -591,7 +580,7 @@ INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestD, ::testing::ValuesIn(inputsd) INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestF, ::testing::ValuesIn(rmat_inputsf)); // INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestD, ::testing::ValuesIn(rmat_inputsd)); -INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); +// INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); } // namespace sparse } // namespace raft From 4f37b5c3b9ac61ebb04220763279c3aef62475e1 Mon Sep 17 00:00:00 2001 From: aamijar Date: Thu, 22 Aug 2024 02:15:39 +0000 Subject: [PATCH 08/23] update gtest edge case --- cpp/test/sparse/solver/lanczos.cu | 61 ++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index 287c330449..3256fc1caa 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -420,20 +420,37 @@ const std::vector> rmat_inputsf = { -8.765364, -8.688508, -8.458255, -8.385196, -8.217982, -8.0442095}}}; -const std::vector> rmat_inputsd = { - {50, 100, 10000, 0, 0, 1e-9, 42, 12, 12, 1, {-122.526794, -74.00686, -59.698284, -54.68617, - -49.686813, -34.02644, -32.130703, -31.26906, - -30.32097, -22.946098, -20.497862, -20.23817, - -19.269697, -18.42496, -17.675667, -17.013401, - -16.734581, -15.820215, -15.73925, -15.448187, - -15.044634, -14.692028, -14.127425, -13.967386, - -13.6237755, -13.469393, -13.181225, -12.777589, - -12.623185, -12.55508, -12.2874565, -12.053391, - -11.677346, -11.558279, -11.163732, -10.922034, - -10.7936945, -10.558049, -10.205776, -10.005316, - -9.559181, -9.491834, -9.242631, -8.883637, - -8.765364, -8.688508, -8.458255, -8.385196, - -8.217982, -8.0442095}}}; +const std::vector> rmat_inputs_edge_case = { + {100, + 300, + 10000, + 0, + 0, + 1e-9, + 42, + 12, + 12, + 1, + {-1.22526756e+02, -7.40069504e+01, -5.96983109e+01, -5.46862068e+01, -4.96868439e+01, + -3.40264435e+01, -3.21306839e+01, -3.12690392e+01, -3.03210258e+01, -2.29461250e+01, + -2.04978676e+01, -2.02381744e+01, -1.92697086e+01, -1.84249725e+01, -1.76756725e+01, + -1.70134144e+01, -1.67345791e+01, -1.58202209e+01, -1.57392349e+01, -1.54481869e+01, + -1.50446243e+01, -1.46920280e+01, -1.41274376e+01, -1.39673843e+01, -1.36237764e+01, + -1.34693928e+01, -1.31812143e+01, -1.27775812e+01, -1.26231880e+01, -1.25550766e+01, + -1.22874584e+01, -1.20533924e+01, -1.16773510e+01, -1.15582829e+01, -1.11637363e+01, + -1.09220333e+01, -1.07936945e+01, -1.05580463e+01, -1.02057772e+01, -1.00053129e+01, + -9.55917740e+00, -9.49183655e+00, -9.24262238e+00, -8.88363647e+00, -8.76536846e+00, + -8.68850899e+00, -8.45825481e+00, -8.38520622e+00, -8.21798038e+00, -8.04420948e+00, + -7.90373087e+00, -7.83332729e+00, -7.54670286e+00, -7.50262451e+00, -7.36070538e+00, + -7.06634855e+00, -6.89205170e+00, -6.64973640e+00, -6.46234751e+00, -5.98167992e+00, + -5.67716694e+00, -5.48805237e+00, -5.00374651e+00, -4.64848948e+00, -6.70900226e-06, + -5.04503123e-06, -1.94547101e-06, -5.66026663e-13, -5.23560958e-13, -4.79860509e-13, + -4.48999019e-13, -4.35402040e-13, -4.26073429e-13, -4.10326368e-13, -4.09151066e-13, + -3.81928457e-13, -3.71661062e-13, -3.63793847e-13, -3.51424022e-13, -3.45496228e-13, + -3.36190629e-13, -3.27994251e-13, -3.12900720e-13, -3.00004786e-13, -2.84064601e-13, + -2.75522199e-13, -2.58613199e-13, -2.47531948e-13, -2.35822267e-13, -2.04967106e-13, + -1.92008627e-13, -1.72746230e-13, -1.51118782e-13, -1.39004232e-13, -1.23819764e-13, + -1.02513457e-13, -8.25850415e-14, -6.00154488e-14, -4.85406359e-14, -3.43267861e-14}}}; using LanczosTestF = lanczos_tests; TEST_P(LanczosTestF, Result) { Run(); } @@ -444,11 +461,8 @@ TEST_P(LanczosTestD, Result) { Run(); } using RmatLanczosTestF = rmat_lanczos_tests; TEST_P(RmatLanczosTestF, Result) { Run(); } -// using RmatLanczosTestD = rmat_lanczos_tests; -// TEST_P(RmatLanczosTestD, Result) -// { -// Run(); -// } +using RmatLanczosTestEdgeCase = rmat_lanczos_tests; +TEST_P(RmatLanczosTestEdgeCase, Result) { Run(); } template void save_vectors(const std::string& filename, @@ -572,15 +586,18 @@ TEST_P(DummyLanczosTest, Result) raft::copy(colsH.data(), symmetric_coo.cols(), symmetric_coo.nnz, stream); raft::copy(valsH.data(), symmetric_coo.vals(), symmetric_coo.nnz, stream); - save_vectors("sparse.bin", rowsH, colsH, valsH); + // This is to inspect the RMAT values and save them to a file + // save_vectors("sparse.bin", rowsH, colsH, valsH); } INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestD, ::testing::ValuesIn(inputsd)); INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestF, ::testing::ValuesIn(rmat_inputsf)); -// INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestD, ::testing::ValuesIn(rmat_inputsd)); +INSTANTIATE_TEST_CASE_P(LanczosTests, + RmatLanczosTestEdgeCase, + ::testing::ValuesIn(rmat_inputs_edge_case)); -// INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); +INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); } // namespace sparse } // namespace raft From 79ce3f1bd08f146eed2081161790849cf4987a95 Mon Sep 17 00:00:00 2001 From: aamijar Date: Thu, 22 Aug 2024 05:55:32 +0000 Subject: [PATCH 09/23] update gtest clean --- cpp/test/sparse/solver/lanczos.cu | 382 ++++++++---------------------- 1 file changed, 97 insertions(+), 285 deletions(-) diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index 3256fc1caa..90cd3c607a 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -46,10 +46,9 @@ #include #include -namespace raft { -namespace sparse { +namespace raft::sparse { -template +template struct lanczos_inputs { int n_components; int restartiter; @@ -58,13 +57,13 @@ struct lanczos_inputs { float conv_eps; float tol; uint64_t seed; - std::vector rows; // indptr - std::vector cols; // indices - std::vector vals; // data - std::vector expected_eigenvalues; + std::vector rows; // indptr + std::vector cols; // indices + std::vector vals; // data + std::vector expected_eigenvalues; }; -template +template struct rmat_lanczos_inputs { int n_components; int restartiter; @@ -76,22 +75,18 @@ struct rmat_lanczos_inputs { int r_scale; int c_scale; float sparsity; - std::vector expected_eigenvalues; + std::vector expected_eigenvalues; }; -template -class dummy_lanczos_tests - : public ::testing::TestWithParam> {}; - -template +template class rmat_lanczos_tests - : public ::testing::TestWithParam> { + : public ::testing::TestWithParam> { public: rmat_lanczos_tests() - : params(::testing::TestWithParam>::GetParam()), + : params(::testing::TestWithParam>::GetParam()), stream(resource::get_cuda_stream(handle)), rng(params.seed), - expected_eigenvalues(raft::make_device_vector( + expected_eigenvalues(raft::make_device_vector( handle, params.n_components)), r_scale(params.r_scale), c_scale(params.c_scale), @@ -116,32 +111,32 @@ class rmat_lanczos_tests uint64_t n_nodes = 1 << std::max(r_scale, c_scale); uint64_t theta_len = std::max(r_scale, c_scale) * 4; - raft::device_vector theta = - raft::make_device_vector(handle, theta_len); - raft::random::uniform(handle, rng, theta.view(), 0, 1); + raft::device_vector theta = + raft::make_device_vector(handle, theta_len); + raft::random::uniform(handle, rng, theta.view(), 0, 1); - raft::device_matrix out = - raft::make_device_matrix(handle, n_edges * 2, 2); - raft::device_vector out_src = - raft::make_device_vector(handle, n_edges); - raft::device_vector out_dst = - raft::make_device_vector(handle, n_edges); + raft::device_matrix out = + raft::make_device_matrix(handle, n_edges * 2, 2); + raft::device_vector out_src = + raft::make_device_vector(handle, n_edges); + raft::device_vector out_dst = + raft::make_device_vector(handle, n_edges); raft::random::RngState rng1{params.seed}; - raft::random::rmat_rectangular_gen(handle, - rng1, - make_const_mdspan(theta.view()), - out.view(), - out_src.view(), - out_dst.view(), - r_scale, - c_scale); + raft::random::rmat_rectangular_gen(handle, + rng1, + make_const_mdspan(theta.view()), + out.view(), + out_src.view(), + out_dst.view(), + r_scale, + c_scale); - raft::device_vector out_data = - raft::make_device_vector(handle, n_edges); - raft::matrix::fill(handle, out_data.view(), 1.0); - raft::sparse::COO coo(stream); + raft::device_vector out_data = + raft::make_device_vector(handle, n_edges); + raft::matrix::fill(handle, out_data.view(), 1.0); + raft::sparse::COO coo(stream); raft::sparse::op::coo_sort(n_nodes, n_nodes, @@ -150,22 +145,22 @@ class rmat_lanczos_tests out_dst.data_handle(), out_data.data_handle(), stream); - raft::sparse::op::max_duplicates(handle, - coo, - out_src.data_handle(), - out_dst.data_handle(), - out_data.data_handle(), - n_edges, - n_nodes, - n_nodes); - - raft::sparse::COO symmetric_coo(stream); + raft::sparse::op::max_duplicates(handle, + coo, + out_src.data_handle(), + out_dst.data_handle(), + out_data.data_handle(), + n_edges, + n_nodes, + n_nodes); + + raft::sparse::COO symmetric_coo(stream); raft::sparse::linalg::symmetrize( handle, coo.rows(), coo.cols(), coo.vals(), coo.n_rows, coo.n_cols, coo.nnz, symmetric_coo); - raft::device_vector row_indices = - raft::make_device_vector(handle, - symmetric_coo.n_rows + 1); + raft::device_vector row_indices = + raft::make_device_vector(handle, + symmetric_coo.n_rows + 1); raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), symmetric_coo.nnz, row_indices.data_handle(), @@ -174,76 +169,68 @@ class rmat_lanczos_tests int n_components = params.n_components; - raft::device_vector v0 = - raft::make_device_vector(handle, symmetric_coo.n_rows); - - raft::random::uniform(handle, rng, v0.view(), 0, 1); - // raft::spectral::matrix::sparse_matrix_t const csr_m{handle, - // row_indices.data_handle(), symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, - // symmetric_coo.nnz}; raft::spectral::eigen_solver_config_t - // cfg{n_components, params.maxiter, params.restartiter, params.tol, false, rng.seed}; - std::tuple stats; - // raft::spectral::lanczos_solver_t eigen_solver{cfg}; - - raft::device_vector eigenvalues = - raft::make_device_vector(handle, n_components); - raft::device_matrix eigenvectors = - raft::make_device_matrix( + raft::device_vector v0 = + raft::make_device_vector(handle, symmetric_coo.n_rows); + + raft::random::uniform(handle, rng, v0.view(), 0, 1); + std::tuple stats; + + raft::device_vector eigenvalues = + raft::make_device_vector(handle, n_components); + raft::device_matrix eigenvectors = + raft::make_device_matrix( handle, symmetric_coo.n_rows, n_components); - raft::spectral::matrix::sparse_matrix_t const csr_m{ + raft::spectral::matrix::sparse_matrix_t const csr_m{ handle, row_indices.data_handle(), symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, symmetric_coo.nnz}; - raft::sparse::solver::lanczos_solver_config config{ + raft::sparse::solver::lanczos_solver_config config{ n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; std::get<0>(stats) = - raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( handle, csr_m, config, v0.view(), eigenvalues.view(), eigenvectors.view()); - // std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, csr_m, - // eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle()); - - ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), - expected_eigenvalues.data_handle(), - n_components, - raft::CompareApprox(1e-5), - stream)); + ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), + expected_eigenvalues.data_handle(), + n_components, + raft::CompareApprox(1e-5), + stream)); } protected: - rmat_lanczos_inputs params; + rmat_lanczos_inputs params; raft::resources handle; cudaStream_t stream; raft::random::RngState rng; int r_scale; int c_scale; float sparsity; - raft::device_vector expected_eigenvalues; + raft::device_vector expected_eigenvalues; }; -template -class lanczos_tests : public ::testing::TestWithParam> { +template +class lanczos_tests : public ::testing::TestWithParam> { public: lanczos_tests() - : params(::testing::TestWithParam>::GetParam()), + : params(::testing::TestWithParam>::GetParam()), stream(resource::get_cuda_stream(handle)), n(params.rows.size() - 1), nnz(params.vals.size()), rng(params.seed), - rows(raft::make_device_vector(handle, n + 1)), - cols(raft::make_device_vector(handle, nnz)), - vals(raft::make_device_vector(handle, nnz)), - v0(raft::make_device_vector(handle, n)), - eigenvalues(raft::make_device_vector( + rows(raft::make_device_vector(handle, n + 1)), + cols(raft::make_device_vector(handle, nnz)), + vals(raft::make_device_vector(handle, nnz)), + v0(raft::make_device_vector(handle, n)), + eigenvalues(raft::make_device_vector( handle, params.n_components)), - eigenvectors(raft::make_device_matrix( + eigenvectors(raft::make_device_matrix( handle, n, params.n_components)), - expected_eigenvalues(raft::make_device_vector( - handle, params.n_components)) + expected_eigenvalues( + raft::make_device_vector(handle, params.n_components)) { } @@ -263,46 +250,38 @@ class lanczos_tests : public ::testing::TestWithParam(handle, rng, v0.view(), 0, 1); - // raft::spectral::matrix::sparse_matrix_t const csr_m{handle, - // rows.data_handle(), cols.data_handle(), vals.data_handle(), n, nnz}; - // raft::spectral::eigen_solver_config_t cfg{params.n_components, - // params.maxiter, params.restartiter, params.tol, false, params.seed}; - std::tuple stats; - // raft::spectral::lanczos_solver_t eigen_solver{cfg}; - - raft::spectral::matrix::sparse_matrix_t const csr_m{ + raft::random::uniform(handle, rng, v0.view(), 0, 1); + std::tuple stats; + + raft::spectral::matrix::sparse_matrix_t const csr_m{ handle, rows.data_handle(), cols.data_handle(), vals.data_handle(), n, nnz}; - raft::sparse::solver::lanczos_solver_config config{ + raft::sparse::solver::lanczos_solver_config config{ params.n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; std::get<0>(stats) = - raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( handle, csr_m, config, v0.view(), eigenvalues.view(), eigenvectors.view()); - // std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, csr_m, - // eigenvalues.data_handle(), eigenvectors.data_handle(), v0.data_handle()); - - ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), - expected_eigenvalues.data_handle(), - params.n_components, - raft::CompareApprox(1e-5), - stream)); + ASSERT_TRUE(raft::devArrMatch(eigenvalues.data_handle(), + expected_eigenvalues.data_handle(), + params.n_components, + raft::CompareApprox(1e-5), + stream)); } protected: - lanczos_inputs params; + lanczos_inputs params; raft::resources handle; cudaStream_t stream; int n; int nnz; raft::random::RngState rng; - raft::device_vector rows; - raft::device_vector cols; - raft::device_vector vals; - raft::device_vector v0; - raft::device_vector eigenvalues; - raft::device_matrix eigenvectors; - raft::device_vector expected_eigenvalues; + raft::device_vector rows; + raft::device_vector cols; + raft::device_vector vals; + raft::device_vector v0; + raft::device_vector eigenvalues; + raft::device_matrix eigenvectors; + raft::device_vector expected_eigenvalues; }; const std::vector> inputsf = { @@ -420,38 +399,6 @@ const std::vector> rmat_inputsf = { -8.765364, -8.688508, -8.458255, -8.385196, -8.217982, -8.0442095}}}; -const std::vector> rmat_inputs_edge_case = { - {100, - 300, - 10000, - 0, - 0, - 1e-9, - 42, - 12, - 12, - 1, - {-1.22526756e+02, -7.40069504e+01, -5.96983109e+01, -5.46862068e+01, -4.96868439e+01, - -3.40264435e+01, -3.21306839e+01, -3.12690392e+01, -3.03210258e+01, -2.29461250e+01, - -2.04978676e+01, -2.02381744e+01, -1.92697086e+01, -1.84249725e+01, -1.76756725e+01, - -1.70134144e+01, -1.67345791e+01, -1.58202209e+01, -1.57392349e+01, -1.54481869e+01, - -1.50446243e+01, -1.46920280e+01, -1.41274376e+01, -1.39673843e+01, -1.36237764e+01, - -1.34693928e+01, -1.31812143e+01, -1.27775812e+01, -1.26231880e+01, -1.25550766e+01, - -1.22874584e+01, -1.20533924e+01, -1.16773510e+01, -1.15582829e+01, -1.11637363e+01, - -1.09220333e+01, -1.07936945e+01, -1.05580463e+01, -1.02057772e+01, -1.00053129e+01, - -9.55917740e+00, -9.49183655e+00, -9.24262238e+00, -8.88363647e+00, -8.76536846e+00, - -8.68850899e+00, -8.45825481e+00, -8.38520622e+00, -8.21798038e+00, -8.04420948e+00, - -7.90373087e+00, -7.83332729e+00, -7.54670286e+00, -7.50262451e+00, -7.36070538e+00, - -7.06634855e+00, -6.89205170e+00, -6.64973640e+00, -6.46234751e+00, -5.98167992e+00, - -5.67716694e+00, -5.48805237e+00, -5.00374651e+00, -4.64848948e+00, -6.70900226e-06, - -5.04503123e-06, -1.94547101e-06, -5.66026663e-13, -5.23560958e-13, -4.79860509e-13, - -4.48999019e-13, -4.35402040e-13, -4.26073429e-13, -4.10326368e-13, -4.09151066e-13, - -3.81928457e-13, -3.71661062e-13, -3.63793847e-13, -3.51424022e-13, -3.45496228e-13, - -3.36190629e-13, -3.27994251e-13, -3.12900720e-13, -3.00004786e-13, -2.84064601e-13, - -2.75522199e-13, -2.58613199e-13, -2.47531948e-13, -2.35822267e-13, -2.04967106e-13, - -1.92008627e-13, -1.72746230e-13, -1.51118782e-13, -1.39004232e-13, -1.23819764e-13, - -1.02513457e-13, -8.25850415e-14, -6.00154488e-14, -4.85406359e-14, -3.43267861e-14}}}; - using LanczosTestF = lanczos_tests; TEST_P(LanczosTestF, Result) { Run(); } @@ -461,143 +408,8 @@ TEST_P(LanczosTestD, Result) { Run(); } using RmatLanczosTestF = rmat_lanczos_tests; TEST_P(RmatLanczosTestF, Result) { Run(); } -using RmatLanczosTestEdgeCase = rmat_lanczos_tests; -TEST_P(RmatLanczosTestEdgeCase, Result) { Run(); } - -template -void save_vectors(const std::string& filename, - const std::vector& rows, - const std::vector& cols, - const std::vector& vals) -{ - std::ofstream out(filename, std::ios::binary); - - // Save the size of each vector - size_t size_rows = rows.size(); - size_t size_cols = cols.size(); - size_t size_vals = vals.size(); - - out.write(reinterpret_cast(&size_rows), sizeof(size_rows)); - out.write(reinterpret_cast(&size_cols), sizeof(size_cols)); - out.write(reinterpret_cast(&size_vals), sizeof(size_vals)); - - // Save the vectors - out.write(reinterpret_cast(rows.data()), size_rows * sizeof(index_type)); - out.write(reinterpret_cast(cols.data()), size_cols * sizeof(index_type)); - out.write(reinterpret_cast(vals.data()), size_vals * sizeof(value_type)); - - out.close(); -} - -using DummyLanczosTest = dummy_lanczos_tests; -TEST_P(DummyLanczosTest, Result) -{ - raft::resources handle; - cudaStream_t stream = resource::get_cuda_stream(handle); - raft::random::RngState rng(42); - - using index_type = int; - using value_type = float; - int r_scale = 12; - int c_scale = 12; - float sparsity = 1; - uint64_t n_edges = sparsity * ((long long)(1 << r_scale) * (long long)(1 << c_scale)); - uint64_t n_nodes = 1 << std::max(r_scale, c_scale); - uint64_t theta_len = std::max(r_scale, c_scale) * 4; - - std::cout << "n_edges" << n_edges << std::endl; - std::cout << "n_nodes" << n_nodes << std::endl; - - raft::device_vector theta = - raft::make_device_vector(handle, theta_len); - raft::random::uniform(handle, rng, theta.view(), 0, 1); - // print_device_vector("theta", theta.data_handle(), theta_len, std::cout); - - raft::device_matrix out = - raft::make_device_matrix(handle, n_edges * 2, 2); - - raft::device_vector out_src = - raft::make_device_vector(handle, n_edges); - raft::device_vector out_dst = - raft::make_device_vector(handle, n_edges); - - raft::random::RngState rng1{42}; - raft::random::rmat_rectangular_gen(handle, - rng1, - make_const_mdspan(theta.view()), - out.view(), - out_src.view(), - out_dst.view(), - r_scale, - c_scale); - - // print_device_vector("out", out.data_handle(), n_edges*2, std::cout); - // print_device_vector("out_src", out_src.data_handle(), n_edges, std::cout); - // print_device_vector("out_dst", out_dst.data_handle(), n_edges, std::cout); - - raft::device_vector out_data = - raft::make_device_vector(handle, n_edges); - raft::matrix::fill(handle, out_data.view(), 1.0F); - raft::sparse::COO coo(stream); - - raft::sparse::op::coo_sort(n_nodes, - n_nodes, - n_edges, - out_src.data_handle(), - out_dst.data_handle(), - out_data.data_handle(), - stream); - raft::sparse::op::max_duplicates(handle, - coo, - out_src.data_handle(), - out_dst.data_handle(), - out_data.data_handle(), - n_edges, - n_nodes, - n_nodes); - - // print_device_vector("coo_rows", coo.rows(), coo.nnz, std::cout); - // print_device_vector("coo_cols", coo.cols(), coo.nnz, std::cout); - // print_device_vector("coo_vals", coo.vals(), coo.nnz, std::cout); - - // print_device_vector("csr_row_indices", row_indices.data_handle(), coo.n_rows + 1, std::cout); - - raft::sparse::COO symmetric_coo(stream); - raft::sparse::linalg::symmetrize( - handle, coo.rows(), coo.cols(), coo.vals(), coo.n_rows, coo.n_cols, coo.nnz, symmetric_coo); - - raft::device_vector row_indices = - raft::make_device_vector(handle, - symmetric_coo.n_rows + 1); - raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), - symmetric_coo.nnz, - row_indices.data_handle(), - symmetric_coo.n_rows + 1, - stream); - - // print_device_vector("sym_coo_rows", symmetric_coo.rows(), symmetric_coo.nnz, std::cout); - // print_device_vector("sym_coo_cols", symmetric_coo.cols(), symmetric_coo.nnz, std::cout); - // print_device_vector("sym_coo_vals", symmetric_coo.vals(), symmetric_coo.nnz, std::cout); - - std::vector rowsH(symmetric_coo.n_rows + 1); - std::vector colsH(symmetric_coo.nnz); - std::vector valsH(symmetric_coo.nnz); - raft::copy(rowsH.data(), row_indices.data_handle(), symmetric_coo.n_rows + 1, stream); - raft::copy(colsH.data(), symmetric_coo.cols(), symmetric_coo.nnz, stream); - raft::copy(valsH.data(), symmetric_coo.vals(), symmetric_coo.nnz, stream); - - // This is to inspect the RMAT values and save them to a file - // save_vectors("sparse.bin", rowsH, colsH, valsH); -} - INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_CASE_P(LanczosTests, LanczosTestD, ::testing::ValuesIn(inputsd)); INSTANTIATE_TEST_CASE_P(LanczosTests, RmatLanczosTestF, ::testing::ValuesIn(rmat_inputsf)); -INSTANTIATE_TEST_CASE_P(LanczosTests, - RmatLanczosTestEdgeCase, - ::testing::ValuesIn(rmat_inputs_edge_case)); - -INSTANTIATE_TEST_CASE_P(LanczosTests, DummyLanczosTest, ::testing::ValuesIn(inputsf)); -} // namespace sparse -} // namespace raft +} // namespace raft::sparse From ceb1d7a66e7e4711287adb1ef477e63321d73038 Mon Sep 17 00:00:00 2001 From: aamijar Date: Fri, 23 Aug 2024 19:26:30 +0000 Subject: [PATCH 10/23] resolving pr comments --- cpp/include/raft/linalg/detail/norm.cuh | 13 +- cpp/include/raft/linalg/norm.cuh | 7 + .../raft/sparse/solver/detail/lanczos.cuh | 386 +++++++++--------- cpp/include/raft/sparse/solver/lanczos.cuh | 3 +- .../raft/sparse/solver/lanczos_types.hpp | 32 ++ cpp/include/raft_runtime/solver/lanczos.hpp | 2 +- .../raft_runtime/solver/lanczos_solver.cuh | 1 + cpp/test/sparse/solver/lanczos.cu | 1 + python/pylibraft/pylibraft/solver/__init__.py | 2 +- python/pylibraft/pylibraft/solver/lanczos.pyx | 40 +- 10 files changed, 282 insertions(+), 205 deletions(-) create mode 100644 cpp/include/raft/sparse/solver/lanczos_types.hpp diff --git a/cpp/include/raft/linalg/detail/norm.cuh b/cpp/include/raft/linalg/detail/norm.cuh index ed7e360848..793fba81c2 100644 --- a/cpp/include/raft/linalg/detail/norm.cuh +++ b/cpp/include/raft/linalg/detail/norm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include #include @@ -138,6 +140,15 @@ void colNormCaller(Type* dots, }; } +template +void nrm2( + raft::resources const& handle, int n, const T* x, int incx, T* result, cudaStream_t stream) +{ + cublasHandle_t cublas_h = resource::get_cublas_handle(handle); + detail::cublas_device_pointer_mode pmode(cublas_h); + detail::cublasnrm2(cublas_h, n, x, incx, result, stream); +} + }; // end namespace detail }; // end namespace linalg }; // end namespace raft diff --git a/cpp/include/raft/linalg/norm.cuh b/cpp/include/raft/linalg/norm.cuh index 97a5d6135d..cae8d9c3a8 100644 --- a/cpp/include/raft/linalg/norm.cuh +++ b/cpp/include/raft/linalg/norm.cuh @@ -150,6 +150,13 @@ void norm(raft::resources const& handle, } } +template +void nrm2( + raft::resources const& handle, int n, const T* x, int incx, T* result, cudaStream_t stream) +{ + detail::nrm2(handle, n, x, incx, result, stream); +} + /** @} */ }; // end namespace linalg diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index ade2571a3d..0930abd8df 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -19,48 +19,43 @@ // for cmath: #define _USE_MATH_DEFINES -#include -#include -#include -#include -#include -#include -#include -#include -// #include - #include #include #include #include +#include #include +#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include #include #include #include #include -#include -// #include - -#include -#include -#include -#include #include #include #include #include +#include #include - -// #include -// #include -#include -#include #include +#include #include #include #include +#include +#include +#include +#include +#include #include @@ -1515,11 +1510,11 @@ RAFT_KERNEL kernel_clamp_down_vector(T* vec, T threshold, int size) } template -void cupy_solve_ritz( +void lanczos_solve_ritz( raft::resources const& handle, raft::device_matrix_view alpha, raft::device_matrix_view beta, - std::optional> beta_k, + std::optional> beta_k, index_type_t k, int which, int ncv, @@ -1556,18 +1551,18 @@ void cupy_solve_ritz( } template -void cupy_aux(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, - raft::device_matrix_view V, - raft::device_matrix_view u, - raft::device_matrix_view alpha, - raft::device_matrix_view beta, - int start_idx, - int end_idx, - int ncv, - raft::device_matrix_view v, - raft::device_matrix_view uu, - raft::device_matrix_view vv) +void lanczos_aux(raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + raft::device_matrix_view V, + raft::device_matrix_view u, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + int start_idx, + int end_idx, + int ncv, + raft::device_matrix_view v, + raft::device_matrix_view uu, + raft::device_matrix_view vv) { auto stream = resource::get_cuda_stream(handle); @@ -1607,7 +1602,6 @@ void cupy_aux(raft::resources const& handle, stream); auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); - // LOOP for (int i = start_idx; i < end_idx; i++) { raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -1636,52 +1630,44 @@ void cupy_aux(raft::resources const& handle, raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); raft::copy(&alpha_i_host, &(alpha(0, i)), 1, stream); - raft::linalg::detail::cublasaxpy( - cublas_h, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); - - raft::linalg::detail::cublasaxpy( - cublas_h, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); - - raft::linalg::detail::cublasaxpy( - cublas_h, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); - - raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_T, - n, - i + 1, - &one, - V.data_handle(), - n, - u.data_handle(), - 1, - &zero, - uu.data_handle(), - 1, - stream); - - raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - n, - i + 1, - &mone, - V.data_handle(), - n, - uu.data_handle(), - 1, - &one, - u.data_handle(), - 1, - stream); + raft::linalg::axpy(handle, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); + raft::linalg::axpy(handle, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); + raft::linalg::axpy(handle, n, &mone, vv.data_handle(), 1, u.data_handle(), 1, stream); + + raft::linalg::gemv(handle, + CUBLAS_OP_T, + n, + i + 1, + &one, + V.data_handle(), + n, + u.data_handle(), + 1, + &zero, + uu.data_handle(), + 1, + stream); + + raft::linalg::gemv(handle, + CUBLAS_OP_N, + n, + i + 1, + &mone, + V.data_handle(), + n, + uu.data_handle(), + 1, + &one, + u.data_handle(), + 1, + stream); auto uu_i = raft::make_device_scalar_view(&uu(0, i)); raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); kernel_clamp_down<<<1, 1>>>(alpha_i.data_handle(), static_cast(1e-9)); - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); - raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &beta(0, i), stream); - - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); + raft::linalg::nrm2(handle, n, u.data_handle(), 1, &beta(0, i), stream); int blockSize = 256; int numBlocks = (n + blockSize - 1) / blockSize; @@ -1702,16 +1688,16 @@ void cupy_aux(raft::resources const& handle, } template -int cupy_smallest(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, - int nEigVecs, - int maxIter, - int restartIter, - value_type_t tol, - value_type_t* eigVals_dev, - value_type_t* eigVecs_dev, - value_type_t* v0, - uint64_t seed) +int lanczos_smallest(raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + int nEigVecs, + int maxIter, + int restartIter, + value_type_t tol, + value_type_t* eigVals_dev, + value_type_t* eigVecs_dev, + value_type_t* v0, + uint64_t seed) { int n = A->nrows_; int ncv = restartIter; @@ -1732,7 +1718,7 @@ int cupy_smallest(raft::resources const& handle, auto cublas_h = resource::get_cublas_handle(handle); value_type_t v0nrm = 0; - raft::linalg::detail::cublasnrm2(cublas_h, n, v0_view.data_handle(), 1, &v0nrm, stream); + raft::linalg::nrm2(handle, n, v0_view.data_handle(), 1, &v0nrm, stream); raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); @@ -1760,38 +1746,37 @@ int cupy_smallest(raft::resources const& handle, raft::device_matrix vv = raft::make_device_matrix(handle, 1, n); - cupy_aux(handle, - A, - V.view(), - u.view(), - alpha.view(), - beta.view(), - 0, - ncv, - ncv, - v.view(), - aux_uu.view(), - vv.view()); + lanczos_aux(handle, + A, + V.view(), + u.view(), + alpha.view(), + beta.view(), + 0, + ncv, + ncv, + v.view(), + aux_uu.view(), + vv.view()); auto eigenvectors = raft::make_device_matrix(handle, ncv, ncv); auto eigenvalues = raft::make_device_vector(handle, ncv); - cupy_solve_ritz(handle, - alpha.view(), - beta.view(), - std::nullopt, - nEigVecs, - 0, - ncv, - eigenvectors.view(), - eigenvalues.view()); + lanczos_solve_ritz(handle, + alpha.view(), + beta.view(), + std::nullopt, + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - raft::device_vector_view eigenvalues_k = - raft::make_device_vector_view( - eigenvalues.data_handle(), nEigVecs); + raft::device_vector_view eigenvalues_k = + raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); auto ritz_eigenvectors = raft::make_device_matrix_view( eigVecs_dev, n, nEigVecs); @@ -1805,6 +1790,15 @@ int cupy_smallest(raft::resources const& handle, int numBlocks = (nEigVecs + blockSize - 1) / blockSize; auto s = raft::make_device_vector(handle, nEigVecs); + + // raft::matrix::slice_coordinates coords(eigenvectors_k.extent(0) - 1, 0, + // eigenvectors_k.extent(0), eigenvectors_k.extent(1)); + + // auto S_matrix = raft::make_device_matrix_view(s.data_handle(), 1, nEigVecs); + + // raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k), s, coords); + kernel_get_last_row<<>>( eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); @@ -1816,7 +1810,7 @@ int cupy_smallest(raft::resources const& handle, raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); value_type_t res = 0; - raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); + raft::linalg::nrm2(handle, nEigVecs, beta_k.data_handle(), 1, &res, stream); std::cout << "res " << res << std::endl; @@ -1837,6 +1831,7 @@ int cupy_smallest(raft::resources const& handle, value_type_t one = 1; value_type_t mone = -1; + // Using raft::linalg::gemv leads to Reason=7:CUBLAS_STATUS_INVALID_VALUE raft::linalg::detail::cublasgemv(cublas_h, CUBLAS_OP_T, nEigVecs, @@ -1865,15 +1860,13 @@ int cupy_smallest(raft::resources const& handle, 1, stream); - raft::device_matrix_view V_0_view = - raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); + auto V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); value_type_t unrm = 0; - raft::linalg::detail::cublasnrm2(cublas_h, n, u.data_handle(), 1, &unrm, stream); + raft::linalg::nrm2(handle, n, u.data_handle(), 1, &unrm, stream); raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); - raft::device_vector_view u_vector_const = - raft::make_device_vector_view(u.data_handle(), n); + auto u_vector_const = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::unary_op(handle, u_vector_const, @@ -1955,42 +1948,40 @@ int cupy_smallest(raft::resources const& handle, raft::copy(M_dev.data_handle(), M.data(), 6, stream); raft::copy(vec_dev.data_handle(), vec.data(), 2, stream); - raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - three, - two, - &one, - M_dev.data_handle(), - three, - vec_dev.data_handle(), - 1, - &zero, - out.data_handle(), - 1, - stream); - - raft::linalg::detail::cublasgemv(cublas_h, - CUBLAS_OP_N, - n, - nEigVecs, - &one, - V_k.data_handle(), - n, - beta_k.data_handle(), - 1, - &zero, - temp.data_handle(), - 1, - stream); + raft::linalg::gemv(handle, + CUBLAS_OP_N, + three, + two, + &one, + M_dev.data_handle(), + three, + vec_dev.data_handle(), + 1, + &zero, + out.data_handle(), + 1, + stream); + + raft::linalg::gemv(handle, + CUBLAS_OP_N, + n, + nEigVecs, + &one, + V_k.data_handle(), + n, + beta_k.data_handle(), + 1, + &zero, + temp.data_handle(), + 1, + stream); auto one_scalar = raft::make_device_scalar(handle, 1); kernel_subtract_and_scale<<>>( u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_DEVICE, stream); - raft::linalg::detail::cublasnrm2( - cublas_h, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream); + raft::linalg::nrm2( + handle, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); auto u_vector = raft::make_device_vector_view(u.data_handle(), n); @@ -2002,28 +1993,28 @@ int cupy_smallest(raft::resources const& handle, return y / *device_scalar; }); - cupy_aux(handle, - A, - V.view(), - u.view(), - alpha.view(), - beta.view(), - nEigVecs + 1, - ncv, - ncv, - v.view(), - aux_uu.view(), - vv.view()); + lanczos_aux(handle, + A, + V.view(), + u.view(), + alpha.view(), + beta.view(), + nEigVecs + 1, + ncv, + ncv, + v.view(), + aux_uu.view(), + vv.view()); iter += ncv - nEigVecs; - cupy_solve_ritz(handle, - alpha.view(), - beta.view(), - beta_k.view(), - nEigVecs, - 0, - ncv, - eigenvectors.view(), - eigenvalues.view()); + lanczos_solve_ritz(handle, + alpha.view(), + beta.view(), + beta_k.view(), + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); @@ -2049,9 +2040,7 @@ int cupy_smallest(raft::resources const& handle, raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); - auto cublas_h = resource::get_cublas_handle(handle); - - raft::linalg::detail::cublasnrm2(cublas_h, nEigVecs, beta_k.data_handle(), 1, &res, stream); + raft::linalg::nrm2(handle, nEigVecs, beta_k.data_handle(), 1, &res, stream); std::cout << "res " << res << " " << iter << std::endl; } @@ -2062,34 +2051,37 @@ int cupy_smallest(raft::resources const& handle, return 0; } +/** + * @brief Find the smallest eigenpairs using lanczos solver + * @tparam index_type_t the type of data used for indexing. + * @tparam value_type_t the type of data used for weights, distances. + * @param handle the raft handle. + * @param A Matrix. + * @param config lanczos config used to set hyperparameters + * @param v0 Initial lanczos vector + * @param eigenvalues output eigenvalues + * @param eigenvectors output eigenvectors + * @return Zero if successful. Otherwise non-zero. + */ template -struct lanczos_solver_config { - int n_components; - int max_iterations; - int ncv; - ValueTypeT tolerance; - uint64_t seed; -}; - -template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, - raft::spectral::matrix::sparse_matrix_t const& A, - lanczos_solver_config const& config, - raft::device_vector_view v0, - raft::device_vector_view eigenvalues, - raft::device_matrix_view eigenvectors) -> int + raft::spectral::matrix::sparse_matrix_t const& A, + lanczos_solver_config const& config, + raft::device_vector_view v0, + raft::device_vector_view eigenvalues, + raft::device_matrix_view eigenvectors) -> int { - return cupy_smallest(handle, - &A, - config.n_components, - config.max_iterations, - config.ncv, - config.tolerance, - eigenvalues.data_handle(), - eigenvectors.data_handle(), - v0.data_handle(), - config.seed); + return lanczos_smallest(handle, + &A, + config.n_components, + config.max_iterations, + config.ncv, + config.tolerance, + eigenvalues.data_handle(), + eigenvectors.data_handle(), + v0.data_handle(), + config.seed); } } // namespace raft::sparse::solver::detail diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index e20a1a9776..5241179543 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -19,6 +19,7 @@ #pragma once #include +#include #include namespace raft::sparse::solver { @@ -27,8 +28,6 @@ namespace raft::sparse::solver { // Eigensolver // ========================================================= -using detail::lanczos_solver_config; - template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, diff --git a/cpp/include/raft/sparse/solver/lanczos_types.hpp b/cpp/include/raft/sparse/solver/lanczos_types.hpp new file mode 100644 index 0000000000..7950dbda2a --- /dev/null +++ b/cpp/include/raft/sparse/solver/lanczos_types.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft::sparse::solver { + +template +struct lanczos_solver_config { + int n_components; + int max_iterations; + int ncv; + ValueTypeT tolerance; + uint64_t seed; +}; + +} // namespace raft::sparse::solver \ No newline at end of file diff --git a/cpp/include/raft_runtime/solver/lanczos.hpp b/cpp/include/raft_runtime/solver/lanczos.hpp index 21ba0d1627..a08b5be394 100644 --- a/cpp/include/raft_runtime/solver/lanczos.hpp +++ b/cpp/include/raft_runtime/solver/lanczos.hpp @@ -22,7 +22,7 @@ namespace raft::runtime::solver { /** - * @defgroup rmat_runtime RMAT Runtime API + * @defgroup rmat_runtime lanczos Runtime API * @{ */ diff --git a/cpp/src/raft_runtime/solver/lanczos_solver.cuh b/cpp/src/raft_runtime/solver/lanczos_solver.cuh index 142ed589bf..f5ca2e3069 100644 --- a/cpp/src/raft_runtime/solver/lanczos_solver.cuh +++ b/cpp/src/raft_runtime/solver/lanczos_solver.cuh @@ -15,6 +15,7 @@ */ #include +#include #include #include #include diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index 90cd3c607a..ed157fc5ce 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/python/pylibraft/pylibraft/solver/__init__.py b/python/pylibraft/pylibraft/solver/__init__.py index c418651aca..30afe63e7e 100644 --- a/python/pylibraft/pylibraft/solver/__init__.py +++ b/python/pylibraft/pylibraft/solver/__init__.py @@ -15,4 +15,4 @@ from .lanczos import eigsh -__all__ = ["rmat"] +__all__ = ["eigsh"] diff --git a/python/pylibraft/pylibraft/solver/lanczos.pyx b/python/pylibraft/pylibraft/solver/lanczos.pyx index deb511a707..43b3bda221 100644 --- a/python/pylibraft/pylibraft/solver/lanczos.pyx +++ b/python/pylibraft/pylibraft/solver/lanczos.pyx @@ -104,6 +104,43 @@ cdef extern from "raft_runtime/solver/lanczos.hpp" \ @auto_sync_handle def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, tol=0, seed=None, handle=None): + """ + Find ``k`` eigenvalues and eigenvectors of the real symmetric square + matrix or complex Hermitian matrix ``A``. + + Solves ``Ax = wx``, the standard eigenvalue problem for ``w`` eigenvalues + with corresponding eigenvectors ``x``. + + Args: + a (ndarray, spmatrix or LinearOperator): A symmetric square matrix with + dimension ``(n, n)``. ``a`` must :class:`cupy.ndarray`, + :class:`cupyx.scipy.sparse.spmatrix` or + :class:`cupyx.scipy.sparse.linalg.LinearOperator`. + k (int): The number of eigenvalues and eigenvectors to compute. Must be + ``1 <= k < n``. + v0 (ndarray): Starting vector for iteration. If ``None``, a random + unit vector is used. + ncv (int): The number of Lanczos vectors generated. Must be + ``k + 1 < ncv < n``. If ``None``, default value is used. + maxiter (int): Maximum number of Lanczos update iterations. + If ``None``, default value is used. + tol (float): Tolerance for residuals ``||Ax - wx||``. If ``0``, machine + precision is used. + + Returns: + tuple: + It returns ``w`` and ``x`` + where ``w`` is eigenvalues and ``x`` is eigenvectors. + + .. seealso:: + :func:`scipy.sparse.linalg.eigsh` + :func:`cupyx.scipy.sparse.linalg.eigsh` + + .. note:: + This function uses the thick-restart Lanczos methods + (https://sdm.lbl.gov/~kewu/ps/trlan.html). + + """ if A is None: raise Exception("'A' cannot be None!") @@ -127,7 +164,6 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, vals_ptr = vals.data if ncv is None: - # ncv = min(max(2 * k, k + 32), n - 1) ncv = min(n, max(2*k + 1, 20)) else: ncv = min(max(ncv, k + 2), n - 1) @@ -157,8 +193,6 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, handle = handle if handle is not None else Handle() cdef device_resources *h = handle.getHandle() - print(IndexType, ValueType) - if IndexType == np.int32 and ValueType == np.float32: lanczos_solver( deref(h), From 14b7266bcada98b5be7e8ca568d2d2b4fb41e780 Mon Sep 17 00:00:00 2001 From: aamijar Date: Fri, 23 Aug 2024 23:08:27 +0000 Subject: [PATCH 11/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 31 +++++++++++-------- .../raft/sparse/solver/lanczos_types.hpp | 2 +- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 0930abd8df..1fb31955a8 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1528,18 +1529,22 @@ void lanczos_solve_ritz( raft::make_device_matrix(handle, ncv, ncv); raft::matrix::fill(handle, triangular_matrix.view(), zero); - raft::matrix::initializeDiagonalMatrix( - alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); + raft::device_vector_view alphaVec = + raft::make_device_vector_view(alpha.data_handle(), ncv); + raft::matrix::set_diagonal(handle, alphaVec, triangular_matrix.view()); + + // raft::matrix::initializeDiagonalMatrix( + // alpha.data_handle(), triangular_matrix.data_handle(), ncv, ncv, stream); int blockSize = 256; int numBlocks = (ncv + blockSize - 1) / blockSize; kernel_triangular_populate - <<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); + <<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); if (beta_k) { int threadsPerBlock = 256; int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; - kernel_triangular_beta_k<<>>( + kernel_triangular_beta_k<<>>( triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); } @@ -1566,7 +1571,7 @@ void lanczos_aux(raft::resources const& handle, { auto stream = resource::get_cuda_stream(handle); - int n = A->nrows_; + index_type_t n = A->nrows_; raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); @@ -1665,24 +1670,24 @@ void lanczos_aux(raft::resources const& handle, auto uu_i = raft::make_device_scalar_view(&uu(0, i)); raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); - kernel_clamp_down<<<1, 1>>>(alpha_i.data_handle(), static_cast(1e-9)); + kernel_clamp_down<<<1, 1, 0, stream>>>(alpha_i.data_handle(), static_cast(1e-9)); raft::linalg::nrm2(handle, n, u.data_handle(), 1, &beta(0, i), stream); int blockSize = 256; int numBlocks = (n + blockSize - 1) / blockSize; - kernel_clamp_down_vector<<>>( + kernel_clamp_down_vector<<>>( u.data_handle(), static_cast(1e-7), n); - kernel_clamp_down<<<1, 1>>>(&beta(0, i), static_cast(1e-6)); + kernel_clamp_down<<<1, 1, 0, stream>>>(&beta(0, i), static_cast(1e-6)); if (i >= end_idx - 1) { break; } int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; - kernel_normalize<<>>( + kernel_normalize<<>>( u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); } } @@ -1799,7 +1804,7 @@ int lanczos_smallest(raft::resources const& handle, // raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k), s, coords); - kernel_get_last_row<<>>( + kernel_get_last_row<<>>( eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); auto beta_k = raft::make_device_vector(handle, nEigVecs); @@ -1924,7 +1929,7 @@ int lanczos_smallest(raft::resources const& handle, int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; - kernel_subtract_and_scale<<>>( + kernel_subtract_and_scale<<>>( u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); auto temp = raft::make_device_vector(handle, n); @@ -1977,7 +1982,7 @@ int lanczos_smallest(raft::resources const& handle, stream); auto one_scalar = raft::make_device_scalar(handle, 1); - kernel_subtract_and_scale<<>>( + kernel_subtract_and_scale<<>>( u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); raft::linalg::nrm2( @@ -2030,7 +2035,7 @@ int lanczos_smallest(raft::resources const& handle, int numBlocks = (nEigVecs + blockSize - 1) / blockSize; auto s = raft::make_device_vector(handle, nEigVecs); - kernel_get_last_row<<>>( + kernel_get_last_row<<>>( eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); diff --git a/cpp/include/raft/sparse/solver/lanczos_types.hpp b/cpp/include/raft/sparse/solver/lanczos_types.hpp index 7950dbda2a..8f2d958925 100644 --- a/cpp/include/raft/sparse/solver/lanczos_types.hpp +++ b/cpp/include/raft/sparse/solver/lanczos_types.hpp @@ -29,4 +29,4 @@ struct lanczos_solver_config { uint64_t seed; }; -} // namespace raft::sparse::solver \ No newline at end of file +} // namespace raft::sparse::solver From c5643524708fd50a190c98ea68ee88e0ef0ce3df Mon Sep 17 00:00:00 2001 From: aamijar Date: Fri, 23 Aug 2024 23:31:47 +0000 Subject: [PATCH 12/23] resolving pr comments --- cpp/include/raft/sparse/solver/detail/lanczos.cuh | 15 +++++---------- cpp/test/sparse/solver/lanczos.cu | 11 ++++------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 1fb31955a8..7a3cd562c9 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1736,20 +1736,15 @@ int lanczos_smallest(raft::resources const& handle, V_0_view, [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { return y / *device_scalar; }); - raft::device_matrix alpha = - raft::make_device_matrix(handle, 1, ncv); - raft::device_matrix beta = - raft::make_device_matrix(handle, 1, ncv); + auto alpha = raft::make_device_matrix(handle, 1, ncv); + auto beta = raft::make_device_matrix(handle, 1, ncv); value_type_t zero = 0; raft::matrix::fill(handle, alpha.view(), zero); raft::matrix::fill(handle, beta.view(), zero); - raft::device_matrix v = - raft::make_device_matrix(handle, 1, n); - raft::device_matrix aux_uu = - raft::make_device_matrix(handle, 1, ncv); - raft::device_matrix vv = - raft::make_device_matrix(handle, 1, n); + auto v = raft::make_device_matrix(handle, 1, n); + auto aux_uu = raft::make_device_matrix(handle, 1, ncv); + auto vv = raft::make_device_matrix(handle, 1, n); lanczos_aux(handle, A, diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index ed157fc5ce..367331c436 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -112,16 +112,13 @@ class rmat_lanczos_tests uint64_t n_nodes = 1 << std::max(r_scale, c_scale); uint64_t theta_len = std::max(r_scale, c_scale) * 4; - raft::device_vector theta = - raft::make_device_vector(handle, theta_len); + auto theta = raft::make_device_vector(handle, theta_len); raft::random::uniform(handle, rng, theta.view(), 0, 1); - raft::device_matrix out = + auto out = raft::make_device_matrix(handle, n_edges * 2, 2); - raft::device_vector out_src = - raft::make_device_vector(handle, n_edges); - raft::device_vector out_dst = - raft::make_device_vector(handle, n_edges); + auto out_src = raft::make_device_vector(handle, n_edges); + auto out_dst = raft::make_device_vector(handle, n_edges); raft::random::RngState rng1{params.seed}; From aba9c4eee7b52f0bd276b87384de2cf85745c429 Mon Sep 17 00:00:00 2001 From: aamijar Date: Sat, 24 Aug 2024 00:17:34 +0000 Subject: [PATCH 13/23] resolving pr comments --- cpp/include/raft/sparse/solver/detail/lanczos.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 7a3cd562c9..7c4503ebc4 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1860,7 +1860,8 @@ int lanczos_smallest(raft::resources const& handle, 1, stream); - auto V_0_view = raft::make_device_matrix_view(&((V.view())(nEigVecs, 0)), 1, n); + auto V_0_view = + raft::make_device_matrix_view(V.data_handle() + (nEigVecs * n), 1, n); value_type_t unrm = 0; raft::linalg::nrm2(handle, n, u.data_handle(), 1, &unrm, stream); @@ -1915,7 +1916,7 @@ int lanczos_smallest(raft::resources const& handle, cusparse_spmv_buffer.data_handle(), stream); - auto alpha_k = raft::make_device_scalar_view(&((alpha.view())(0, nEigVecs))); + auto alpha_k = raft::make_device_scalar_view(alpha.data_handle() + nEigVecs); auto V_0_view_vector = raft::make_device_vector_view(V_0_view.data_handle(), n); auto u_view_vector = raft::make_device_vector_view(u.data_handle(), n); From ba16aca9cb73eee2fabcaa4a67b9b4879b69c170 Mon Sep 17 00:00:00 2001 From: aamijar Date: Sat, 24 Aug 2024 03:28:41 +0000 Subject: [PATCH 14/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 44 ++++++------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 7c4503ebc4..980180dee6 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1441,19 +1441,6 @@ RAFT_KERNEL kernel_subtract_and_scale(T* u, T* vec, T* scalar, int n) if (idx < n) { u[idx] -= (*scalar) * vec[idx]; } } -template -RAFT_KERNEL kernel_get_last_row(const T* M, T* S, int numRows, int numCols) -{ - int col = threadIdx.x + blockIdx.x * blockDim.x; - // Ensure the thread index is within the matrix width - if (col < numCols) { - // Index in the column-major order matrix - int index = (numRows - 1) + col * numRows; - // Copy the value to the last row array - S[col] = M[index]; - } -} - template RAFT_KERNEL kernel_triangular_populate(T* M, const T* beta, int n) { @@ -1786,21 +1773,16 @@ int lanczos_smallest(raft::resources const& handle, raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); - int blockSize = 256; // Number of threads per block - int numBlocks = (nEigVecs + blockSize - 1) / blockSize; - auto s = raft::make_device_vector(handle, nEigVecs); - // raft::matrix::slice_coordinates coords(eigenvectors_k.extent(0) - 1, 0, - // eigenvectors_k.extent(0), eigenvectors_k.extent(1)); - - // auto S_matrix = raft::make_device_matrix_view(s.data_handle(), 1, nEigVecs); - - // raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k), s, coords); + auto eigenvectors_k_slice = + raft::make_device_matrix_view( + eigenvectors.data_handle(), ncv, nEigVecs); + auto S_matrix = raft::make_device_matrix_view( + s.data_handle(), 1, nEigVecs); - kernel_get_last_row<<>>( - eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); + raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k_slice), S_matrix, coords); auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); @@ -2027,12 +2009,14 @@ int lanczos_smallest(raft::resources const& handle, raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); - int blockSize = 256; // Number of threads per block - int numBlocks = (nEigVecs + blockSize - 1) / blockSize; + auto eigenvectors_k_slice = + raft::make_device_matrix_view( + eigenvectors.data_handle(), ncv, nEigVecs); + auto S_matrix = raft::make_device_matrix_view( + s.data_handle(), 1, nEigVecs); - auto s = raft::make_device_vector(handle, nEigVecs); - kernel_get_last_row<<>>( - eigenvectors_k.data_handle(), s.data_handle(), ncv, nEigVecs); + raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); + raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k_slice), S_matrix, coords); raft::matrix::fill(handle, beta_k.view(), zero); From 74908f25d09edf8fe7b5c8190be5dc77463b0e90 Mon Sep 17 00:00:00 2001 From: aamijar Date: Sat, 24 Aug 2024 04:27:02 +0000 Subject: [PATCH 15/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 41 +++++++++++-------- cpp/include/raft/sparse/solver/lanczos.cuh | 24 ++++++++++- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 980180dee6..9004d1f288 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -17,6 +17,7 @@ #pragma once // for cmath: +#include "raft/core/device_csr_matrix.hpp" #define _USE_MATH_DEFINES #include @@ -1544,7 +1545,8 @@ void lanczos_solve_ritz( template void lanczos_aux(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + // spectral::matrix::sparse_matrix_t const* A, + raft::device_csr_matrix_view A, raft::device_matrix_view V, raft::device_matrix_view u, raft::device_matrix_view alpha, @@ -1558,7 +1560,8 @@ void lanczos_aux(raft::resources const& handle, { auto stream = resource::get_cuda_stream(handle); - index_type_t n = A->nrows_; + auto A_structure = A.get_structure_view(); + index_type_t n = A_structure.get_n_rows(); raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); @@ -1567,12 +1570,12 @@ void lanczos_aux(raft::resources const& handle, auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A->nrows_, - A->ncols_, - A->nnz_, - const_cast(A->row_offsets_), - const_cast(A->col_indices_), - const_cast(A->values_)); + A_structure.get_n_rows(), + A_structure.get_n_cols(), + A_structure.get_nnz(), + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A_structure.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -1681,7 +1684,7 @@ void lanczos_aux(raft::resources const& handle, template int lanczos_smallest(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + raft::device_csr_matrix_view A, int nEigVecs, int maxIter, int restartIter, @@ -1691,7 +1694,8 @@ int lanczos_smallest(raft::resources const& handle, value_type_t* v0, uint64_t seed) { - int n = A->nrows_; + auto A_structure = A.structure_view(); + int n = A_structure.get_n_rows(); int ncv = restartIter; auto stream = resource::get_cuda_stream(handle); @@ -1860,13 +1864,16 @@ int lanczos_smallest(raft::resources const& handle, auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; + // input_config.a_indptr = const_cast(x_structure.get_indptr().data()); + // input_config.a_indices = const_cast(x_structure.get_indices().data()); + // input_config.a_data = const_cast(x.get_elements().data()); raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A->nrows_, - A->ncols_, - A->nnz_, - const_cast(A->row_offsets_), - const_cast(A->col_indices_), - const_cast(A->values_)); + A_structure.get_n_rows(), + A_structure.get_n_cols(), + A_structure.get_nnz(), + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A_structure.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -2051,7 +2058,7 @@ int lanczos_smallest(raft::resources const& handle, template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, - raft::spectral::matrix::sparse_matrix_t const& A, + raft::device_csr_matrix_view A, lanczos_solver_config const& config, raft::device_vector_view v0, raft::device_vector_view eigenvalues, diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 5241179543..218b4eb806 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -37,8 +37,30 @@ auto lanczos_compute_smallest_eigenvectors( raft::device_vector_view eigenvalues, raft::device_matrix_view eigenvectors) -> int { + // auto c_structure = raft::make_device_compressed_structure_view( + // c_indptr_d.data(), + // c_indices_d.data(), + // params.m, + // params.n, + // static_cast(c_indices_d.size())); + + // auto mask = + // raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); + + // auto c = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); + + // FIXME: move out of function + auto csr_structure = raft::make_device_compressed_structure_view( + A.row_offsets_, + A.col_indices_, + A.ncols_, + A.nrows_, + static_cast(A.nnz_)); + + auto csr_matrix = raft::make_device_matrix_view(A.values_, csr_structure); + return detail::lanczos_compute_smallest_eigenvectors( - handle, A, config, v0, eigenvalues, eigenvectors); + handle, csr_matrix, config, v0, eigenvalues, eigenvectors); } /** From 7b311084e6d924a16d1378c21e27aba362e20a8e Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 27 Aug 2024 19:42:49 +0000 Subject: [PATCH 16/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 100 +++++++++--------- cpp/include/raft/sparse/solver/lanczos.cuh | 23 ++-- 2 files changed, 67 insertions(+), 56 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 9004d1f288..e4af9372d5 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -17,10 +17,10 @@ #pragma once // for cmath: -#include "raft/core/device_csr_matrix.hpp" #define _USE_MATH_DEFINES #include +#include #include #include #include @@ -1544,24 +1544,25 @@ void lanczos_solve_ritz( } template -void lanczos_aux(raft::resources const& handle, - // spectral::matrix::sparse_matrix_t const* A, - raft::device_csr_matrix_view A, - raft::device_matrix_view V, - raft::device_matrix_view u, - raft::device_matrix_view alpha, - raft::device_matrix_view beta, - int start_idx, - int end_idx, - int ncv, - raft::device_matrix_view v, - raft::device_matrix_view uu, - raft::device_matrix_view vv) +void lanczos_aux( + raft::resources const& handle, + // spectral::matrix::sparse_matrix_t const* A, + raft::device_csr_matrix_view A, + raft::device_matrix_view V, + raft::device_matrix_view u, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + int start_idx, + int end_idx, + int ncv, + raft::device_matrix_view v, + raft::device_matrix_view uu, + raft::device_matrix_view vv) { auto stream = resource::get_cuda_stream(handle); - auto A_structure = A.get_structure_view(); - index_type_t n = A_structure.get_n_rows(); + auto A_structure = A.structure_view(); + index_type_t n = A_structure.get_n_rows(); raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); @@ -1569,13 +1570,14 @@ void lanczos_aux(raft::resources const& handle, auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; - raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A_structure.get_n_rows(), - A_structure.get_n_cols(), - A_structure.get_nnz(), - const_cast(A_structure.get_indptr().data()), - const_cast(A_structure.get_indices().data()), - const_cast(A_structure.get_elements().data())); + raft::sparse::detail::cusparsecreatecsr( + &cusparse_A, + A_structure.get_n_rows(), + A_structure.get_n_cols(), + A_structure.get_nnz(), + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -1683,21 +1685,22 @@ void lanczos_aux(raft::resources const& handle, } template -int lanczos_smallest(raft::resources const& handle, - raft::device_csr_matrix_view A, - int nEigVecs, - int maxIter, - int restartIter, - value_type_t tol, - value_type_t* eigVals_dev, - value_type_t* eigVecs_dev, - value_type_t* v0, - uint64_t seed) +int lanczos_smallest( + raft::resources const& handle, + raft::device_csr_matrix_view A, + int nEigVecs, + int maxIter, + int restartIter, + value_type_t tol, + value_type_t* eigVals_dev, + value_type_t* eigVecs_dev, + value_type_t* v0, + uint64_t seed) { auto A_structure = A.structure_view(); - int n = A_structure.get_n_rows(); - int ncv = restartIter; - auto stream = resource::get_cuda_stream(handle); + int n = A_structure.get_n_rows(); + int ncv = restartIter; + auto stream = resource::get_cuda_stream(handle); std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places @@ -1864,16 +1867,17 @@ int lanczos_smallest(raft::resources const& handle, auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; - // input_config.a_indptr = const_cast(x_structure.get_indptr().data()); - // input_config.a_indices = const_cast(x_structure.get_indices().data()); - // input_config.a_data = const_cast(x.get_elements().data()); - raft::sparse::detail::cusparsecreatecsr(&cusparse_A, - A_structure.get_n_rows(), - A_structure.get_n_cols(), - A_structure.get_nnz(), - const_cast(A_structure.get_indptr().data()), - const_cast(A_structure.get_indices().data()), - const_cast(A_structure.get_elements().data())); + // input_config.a_indptr = const_cast(x_structure.get_indptr().data()); + // input_config.a_indices = const_cast(x_structure.get_indices().data()); + // input_config.a_data = const_cast(x.get_elements().data()); + raft::sparse::detail::cusparsecreatecsr( + &cusparse_A, + A_structure.get_n_rows(), + A_structure.get_n_cols(), + A_structure.get_nnz(), + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; @@ -2058,14 +2062,14 @@ int lanczos_smallest(raft::resources const& handle, template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, - raft::device_csr_matrix_view A, + raft::device_csr_matrix_view A, lanczos_solver_config const& config, raft::device_vector_view v0, raft::device_vector_view eigenvalues, raft::device_matrix_view eigenvectors) -> int { return lanczos_smallest(handle, - &A, + A, config.n_components, config.max_iterations, config.ncv, diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 218b4eb806..d65a675dac 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -48,16 +48,23 @@ auto lanczos_compute_smallest_eigenvectors( // raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); // auto c = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - + // FIXME: move out of function - auto csr_structure = raft::make_device_compressed_structure_view( - A.row_offsets_, - A.col_indices_, - A.ncols_, - A.nrows_, - static_cast(A.nnz_)); + IndexTypeT ncols = A.ncols_; + IndexTypeT nrows = A.nrows_; + IndexTypeT nnz = A.nnz_; + + auto csr_structure = + raft::make_device_compressed_structure_view( + const_cast(A.row_offsets_), + const_cast(A.col_indices_), + ncols, + nrows, + nnz); - auto csr_matrix = raft::make_device_matrix_view(A.values_, csr_structure); + auto csr_matrix = + raft::make_device_csr_matrix_view( + const_cast(A.values_), csr_structure); return detail::lanczos_compute_smallest_eigenvectors( handle, csr_matrix, config, v0, eigenvalues, eigenvectors); From 2cdcc663a285faf2da425425cc932b06ceffa674 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 28 Aug 2024 17:02:16 +0000 Subject: [PATCH 17/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 312 +++++++++--------- 1 file changed, 154 insertions(+), 158 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index e4af9372d5..d8499e0749 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1498,27 +1498,27 @@ RAFT_KERNEL kernel_clamp_down_vector(T* vec, T threshold, int size) if (idx < size) { vec[idx] = (fabs(vec[idx]) < threshold) ? 0 : vec[idx]; } } -template +template void lanczos_solve_ritz( raft::resources const& handle, - raft::device_matrix_view alpha, - raft::device_matrix_view beta, - std::optional> beta_k, - index_type_t k, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + std::optional> beta_k, + IndexTypeT k, int which, int ncv, - raft::device_matrix_view eigenvectors, - raft::device_vector_view eigenvalues) + raft::device_matrix_view eigenvectors, + raft::device_vector_view eigenvalues) { auto stream = resource::get_cuda_stream(handle); - value_type_t zero = 0; + ValueTypeT zero = 0; auto triangular_matrix = - raft::make_device_matrix(handle, ncv, ncv); + raft::make_device_matrix(handle, ncv, ncv); raft::matrix::fill(handle, triangular_matrix.view(), zero); - raft::device_vector_view alphaVec = - raft::make_device_vector_view(alpha.data_handle(), ncv); + raft::device_vector_view alphaVec = + raft::make_device_vector_view(alpha.data_handle(), ncv); raft::matrix::set_diagonal(handle, alphaVec, triangular_matrix.view()); // raft::matrix::initializeDiagonalMatrix( @@ -1526,43 +1526,41 @@ void lanczos_solve_ritz( int blockSize = 256; int numBlocks = (ncv + blockSize - 1) / blockSize; - kernel_triangular_populate + kernel_triangular_populate <<>>(triangular_matrix.data_handle(), beta.data_handle(), ncv); if (beta_k) { int threadsPerBlock = 256; int blocksPerGrid = (k + threadsPerBlock - 1) / threadsPerBlock; - kernel_triangular_beta_k<<>>( + kernel_triangular_beta_k<<>>( triangular_matrix.data_handle(), beta_k.value().data_handle(), (int)k, ncv); } auto triangular_matrix_view = - raft::make_device_matrix_view( + raft::make_device_matrix_view( triangular_matrix.data_handle(), ncv, ncv); raft::linalg::eig_dc(handle, triangular_matrix_view, eigenvectors, eigenvalues); } -template -void lanczos_aux( - raft::resources const& handle, - // spectral::matrix::sparse_matrix_t const* A, - raft::device_csr_matrix_view A, - raft::device_matrix_view V, - raft::device_matrix_view u, - raft::device_matrix_view alpha, - raft::device_matrix_view beta, - int start_idx, - int end_idx, - int ncv, - raft::device_matrix_view v, - raft::device_matrix_view uu, - raft::device_matrix_view vv) +template +void lanczos_aux(raft::resources const& handle, + raft::device_csr_matrix_view A, + raft::device_matrix_view V, + raft::device_matrix_view u, + raft::device_matrix_view alpha, + raft::device_matrix_view beta, + int start_idx, + int end_idx, + int ncv, + raft::device_matrix_view v, + raft::device_matrix_view uu, + raft::device_matrix_view vv) { auto stream = resource::get_cuda_stream(handle); auto A_structure = A.structure_view(); - index_type_t n = A_structure.get_n_rows(); + IndexTypeT n = A_structure.get_n_rows(); raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); @@ -1570,22 +1568,21 @@ void lanczos_aux( auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; - raft::sparse::detail::cusparsecreatecsr( - &cusparse_A, - A_structure.get_n_rows(), - A_structure.get_n_cols(), - A_structure.get_nnz(), - const_cast(A_structure.get_indptr().data()), - const_cast(A_structure.get_indices().data()), - const_cast(A.get_elements().data())); + raft::sparse::detail::cusparsecreatecsr(&cusparse_A, + A_structure.get_n_rows(), + A_structure.get_n_cols(), + A_structure.get_nnz(), + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, v.data_handle()); raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); - value_type_t one = 1; - value_type_t zero = 0; + ValueTypeT one = 1; + ValueTypeT zero = 0; size_t bufferSize; raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -1597,7 +1594,7 @@ void lanczos_aux( CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); - auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); + auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); for (int i = start_idx; i < end_idx; i++) { raft::sparse::detail::cusparsespmv(cusparse_h, @@ -1612,20 +1609,20 @@ void lanczos_aux( stream); auto alpha_i = raft::make_device_scalar_view(&alpha(0, i)); - auto v_vector = raft::make_device_vector_view(v.data_handle(), n); - auto u_vector = raft::make_device_vector_view(u.data_handle(), n); + auto v_vector = raft::make_device_vector_view(v.data_handle(), n); + auto u_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::dot(handle, v_vector, u_vector, alpha_i); raft::matrix::fill(handle, vv, zero); auto cublas_h = resource::get_cublas_handle(handle); - value_type_t alpha_i_host = 0; - value_type_t b = 0; - value_type_t mone = -1; + ValueTypeT alpha_i_host = 0; + ValueTypeT b = 0; + ValueTypeT mone = -1; - raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); - raft::copy(&alpha_i_host, &(alpha(0, i)), 1, stream); + raft::copy(&b, &beta(0, (i - 1 + ncv) % ncv), 1, stream); + raft::copy(&alpha_i_host, &(alpha(0, i)), 1, stream); raft::linalg::axpy(handle, n, &alpha_i_host, v.data_handle(), 1, vv.data_handle(), 1, stream); raft::linalg::axpy(handle, n, &b, &V((i - 1 + ncv) % ncv, 0), 1, vv.data_handle(), 1, stream); @@ -1662,40 +1659,40 @@ void lanczos_aux( auto uu_i = raft::make_device_scalar_view(&uu(0, i)); raft::linalg::add(handle, make_const_mdspan(alpha_i), make_const_mdspan(uu_i), alpha_i); - kernel_clamp_down<<<1, 1, 0, stream>>>(alpha_i.data_handle(), static_cast(1e-9)); + kernel_clamp_down<<<1, 1, 0, stream>>>(alpha_i.data_handle(), static_cast(1e-9)); - raft::linalg::nrm2(handle, n, u.data_handle(), 1, &beta(0, i), stream); + raft::linalg::nrm2(handle, n, u.data_handle(), 1, &beta(0, i), stream); int blockSize = 256; int numBlocks = (n + blockSize - 1) / blockSize; kernel_clamp_down_vector<<>>( - u.data_handle(), static_cast(1e-7), n); + u.data_handle(), static_cast(1e-7), n); - kernel_clamp_down<<<1, 1, 0, stream>>>(&beta(0, i), static_cast(1e-6)); + kernel_clamp_down<<<1, 1, 0, stream>>>(&beta(0, i), static_cast(1e-6)); if (i >= end_idx - 1) { break; } int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; - kernel_normalize<<>>( + kernel_normalize<<>>( u.data_handle(), beta.data_handle(), i, n, v.data_handle(), V.data_handle(), n); } } -template -int lanczos_smallest( +template +auto lanczos_smallest( raft::resources const& handle, - raft::device_csr_matrix_view A, + raft::device_csr_matrix_view A, int nEigVecs, int maxIter, int restartIter, - value_type_t tol, - value_type_t* eigVals_dev, - value_type_t* eigVecs_dev, - value_type_t* v0, - uint64_t seed) + ValueTypeT tol, + ValueTypeT* eigVals_dev, + ValueTypeT* eigVecs_dev, + ValueTypeT* v0, + uint64_t seed) -> int { auto A_structure = A.structure_view(); int n = A_structure.get_n_rows(); @@ -1704,25 +1701,25 @@ int lanczos_smallest( std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - raft::device_matrix V = - raft::make_device_matrix(handle, ncv, n); - raft::device_matrix_view V_0_view = - raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] - raft::device_matrix_view v0_view = - raft::make_device_matrix_view(v0, 1, n); + raft::device_matrix V = + raft::make_device_matrix(handle, ncv, n); + raft::device_matrix_view V_0_view = + raft::make_device_matrix_view(V.data_handle(), 1, n); // First Row V[0] + raft::device_matrix_view v0_view = + raft::make_device_matrix_view(v0, 1, n); - raft::device_matrix u = - raft::make_device_matrix(handle, 1, n); + raft::device_matrix u = + raft::make_device_matrix(handle, 1, n); raft::copy(u.data_handle(), v0, n, stream); - auto cublas_h = resource::get_cublas_handle(handle); - value_type_t v0nrm = 0; + auto cublas_h = resource::get_cublas_handle(handle); + ValueTypeT v0nrm = 0; raft::linalg::nrm2(handle, n, v0_view.data_handle(), 1, &v0nrm, stream); - raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); + raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); - raft::device_vector_view v0_vector_const = - raft::make_device_vector_view(v0, n); + raft::device_vector_view v0_vector_const = + raft::make_device_vector_view(v0, n); raft::linalg::unary_op( handle, @@ -1730,15 +1727,15 @@ int lanczos_smallest( V_0_view, [device_scalar = v0nrm_scalar.data_handle()] __device__(auto y) { return y / *device_scalar; }); - auto alpha = raft::make_device_matrix(handle, 1, ncv); - auto beta = raft::make_device_matrix(handle, 1, ncv); - value_type_t zero = 0; + auto alpha = raft::make_device_matrix(handle, 1, ncv); + auto beta = raft::make_device_matrix(handle, 1, ncv); + ValueTypeT zero = 0; raft::matrix::fill(handle, alpha.view(), zero); raft::matrix::fill(handle, beta.view(), zero); - auto v = raft::make_device_matrix(handle, 1, n); - auto aux_uu = raft::make_device_matrix(handle, 1, ncv); - auto vv = raft::make_device_matrix(handle, 1, n); + auto v = raft::make_device_matrix(handle, 1, n); + auto aux_uu = raft::make_device_matrix(handle, 1, ncv); + auto vv = raft::make_device_matrix(handle, 1, n); lanczos_aux(handle, A, @@ -1754,71 +1751,70 @@ int lanczos_smallest( vv.view()); auto eigenvectors = - raft::make_device_matrix(handle, ncv, ncv); - auto eigenvalues = raft::make_device_vector(handle, ncv); - - lanczos_solve_ritz(handle, - alpha.view(), - beta.view(), - std::nullopt, - nEigVecs, - 0, - ncv, - eigenvectors.view(), - eigenvalues.view()); - - auto eigenvectors_k = raft::make_device_matrix_view( + raft::make_device_matrix(handle, ncv, ncv); + auto eigenvalues = raft::make_device_vector(handle, ncv); + + lanczos_solve_ritz(handle, + alpha.view(), + beta.view(), + std::nullopt, + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); + + auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - raft::device_vector_view eigenvalues_k = - raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); + raft::device_vector_view eigenvalues_k = + raft::make_device_vector_view(eigenvalues.data_handle(), nEigVecs); - auto ritz_eigenvectors = raft::make_device_matrix_view( - eigVecs_dev, n, nEigVecs); + auto ritz_eigenvectors = + raft::make_device_matrix_view(eigVecs_dev, n, nEigVecs); auto V_T = - raft::make_device_matrix_view(V.data_handle(), n, ncv); - raft::linalg::gemm( + raft::make_device_matrix_view(V.data_handle(), n, ncv); + raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); - auto s = raft::make_device_vector(handle, nEigVecs); + auto s = raft::make_device_vector(handle, nEigVecs); auto eigenvectors_k_slice = - raft::make_device_matrix_view( + raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - auto S_matrix = raft::make_device_matrix_view( + auto S_matrix = raft::make_device_matrix_view( s.data_handle(), 1, nEigVecs); - raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); + raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k_slice), S_matrix, coords); - auto beta_k = raft::make_device_vector(handle, nEigVecs); + auto beta_k = raft::make_device_vector(handle, nEigVecs); raft::matrix::fill(handle, beta_k.view(), zero); - auto beta_scalar = - raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + auto beta_scalar = raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); - value_type_t res = 0; + ValueTypeT res = 0; raft::linalg::nrm2(handle, nEigVecs, beta_k.data_handle(), 1, &res, stream); std::cout << "res " << res << std::endl; - auto uu = raft::make_device_matrix(handle, 0, nEigVecs); + auto uu = raft::make_device_matrix(handle, 0, nEigVecs); int iter = ncv; while (res > tol && iter < maxIter) { - auto beta_view = raft::make_device_matrix_view( + auto beta_view = raft::make_device_matrix_view( beta.data_handle(), 1, nEigVecs); raft::matrix::fill(handle, beta_view, zero); raft::copy(alpha.data_handle(), eigenvalues_k.data_handle(), nEigVecs, stream); auto x_T = - raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); + raft::make_device_matrix_view(ritz_eigenvectors.data_handle(), nEigVecs, n); raft::copy(V.data_handle(), x_T.data_handle(), nEigVecs * n, stream); - value_type_t one = 1; - value_type_t mone = -1; + ValueTypeT one = 1; + ValueTypeT mone = -1; // Using raft::linalg::gemv leads to Reason=7:CUBLAS_STATUS_INVALID_VALUE raft::linalg::detail::cublasgemv(cublas_h, @@ -1850,13 +1846,13 @@ int lanczos_smallest( stream); auto V_0_view = - raft::make_device_matrix_view(V.data_handle() + (nEigVecs * n), 1, n); - value_type_t unrm = 0; + raft::make_device_matrix_view(V.data_handle() + (nEigVecs * n), 1, n); + ValueTypeT unrm = 0; raft::linalg::nrm2(handle, n, u.data_handle(), 1, &unrm, stream); - raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); + raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); - auto u_vector_const = raft::make_device_vector_view(u.data_handle(), n); + auto u_vector_const = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::unary_op(handle, u_vector_const, @@ -1875,16 +1871,16 @@ int lanczos_smallest( A_structure.get_n_rows(), A_structure.get_n_cols(), A_structure.get_nnz(), - const_cast(A_structure.get_indptr().data()), - const_cast(A_structure.get_indices().data()), - const_cast(A.get_elements().data())); + const_cast(A_structure.get_indptr().data()), + const_cast(A_structure.get_indices().data()), + const_cast(A.get_elements().data())); cusparseDnVecDescr_t cusparse_v; cusparseDnVecDescr_t cusparse_u; raft::sparse::detail::cusparsecreatednvec(&cusparse_v, n, V_0_view.data_handle()); raft::sparse::detail::cusparsecreatednvec(&cusparse_u, n, u.data_handle()); - value_type_t zero = 0; + ValueTypeT zero = 0; size_t bufferSize; raft::sparse::detail::cusparsespmv_buffersize(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -1896,7 +1892,7 @@ int lanczos_smallest( CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize, stream); - auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); + auto cusparse_spmv_buffer = raft::make_device_vector(handle, bufferSize); raft::sparse::detail::cusparsespmv(cusparse_h, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -1909,10 +1905,10 @@ int lanczos_smallest( cusparse_spmv_buffer.data_handle(), stream); - auto alpha_k = raft::make_device_scalar_view(alpha.data_handle() + nEigVecs); + auto alpha_k = raft::make_device_scalar_view(alpha.data_handle() + nEigVecs); auto V_0_view_vector = - raft::make_device_vector_view(V_0_view.data_handle(), n); - auto u_view_vector = raft::make_device_vector_view(u.data_handle(), n); + raft::make_device_vector_view(V_0_view.data_handle(), n); + auto u_view_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::dot(handle, V_0_view_vector, u_view_vector, alpha_k); @@ -1921,24 +1917,24 @@ int lanczos_smallest( kernel_subtract_and_scale<<>>( u.data_handle(), V_0_view.data_handle(), alpha_k.data_handle(), n); - auto temp = raft::make_device_vector(handle, n); + auto temp = raft::make_device_vector(handle, n); - auto V_k = raft::make_device_matrix_view( + auto V_k = raft::make_device_matrix_view( V.data_handle(), nEigVecs, n); auto V_k_T = - raft::make_device_matrix(handle, n, nEigVecs); + raft::make_device_matrix(handle, n, nEigVecs); raft::linalg::transpose(handle, V_k, V_k_T.view()); - value_type_t three = 3; - value_type_t two = 2; + ValueTypeT three = 3; + ValueTypeT two = 2; - std::vector M = {1, 2, 3, 4, 5, 6}; - std::vector vec = {1, 1}; + std::vector M = {1, 2, 3, 4, 5, 6}; + std::vector vec = {1, 1}; - auto M_dev = raft::make_device_matrix(handle, 2, 3); - auto vec_dev = raft::make_device_vector(handle, 2); - auto out = raft::make_device_vector(handle, 3); + auto M_dev = raft::make_device_matrix(handle, 2, 3); + auto vec_dev = raft::make_device_vector(handle, 2); + auto out = raft::make_device_vector(handle, 3); raft::copy(M_dev.data_handle(), M.data(), 6, stream); raft::copy(vec_dev.data_handle(), vec.data(), 2, stream); @@ -1970,15 +1966,15 @@ int lanczos_smallest( 1, stream); - auto one_scalar = raft::make_device_scalar(handle, 1); - kernel_subtract_and_scale<<>>( + auto one_scalar = raft::make_device_scalar(handle, 1); + kernel_subtract_and_scale<<>>( u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); - raft::linalg::nrm2( + raft::linalg::nrm2( handle, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); - auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); - auto u_vector = raft::make_device_vector_view(u.data_handle(), n); + auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); + auto u_vector = raft::make_device_vector_view(u.data_handle(), n); raft::linalg::unary_op(handle, u_vector, @@ -2000,39 +1996,39 @@ int lanczos_smallest( aux_uu.view(), vv.view()); iter += ncv - nEigVecs; - lanczos_solve_ritz(handle, - alpha.view(), - beta.view(), - beta_k.view(), - nEigVecs, - 0, - ncv, - eigenvectors.view(), - eigenvalues.view()); - auto eigenvectors_k = raft::make_device_matrix_view( + lanczos_solve_ritz(handle, + alpha.view(), + beta.view(), + beta_k.view(), + nEigVecs, + 0, + ncv, + eigenvectors.view(), + eigenvalues.view()); + auto eigenvectors_k = raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - auto ritz_eigenvectors = raft::make_device_matrix_view( + auto ritz_eigenvectors = raft::make_device_matrix_view( eigVecs_dev, n, nEigVecs); - auto V_T = raft::make_device_matrix_view( - V.data_handle(), n, ncv); - raft::linalg::gemm( + auto V_T = + raft::make_device_matrix_view(V.data_handle(), n, ncv); + raft::linalg::gemm( handle, V_T, eigenvectors_k, ritz_eigenvectors); auto eigenvectors_k_slice = - raft::make_device_matrix_view( + raft::make_device_matrix_view( eigenvectors.data_handle(), ncv, nEigVecs); - auto S_matrix = raft::make_device_matrix_view( + auto S_matrix = raft::make_device_matrix_view( s.data_handle(), 1, nEigVecs); - raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); + raft::matrix::slice_coordinates coords(ncv - 1, 0, ncv, nEigVecs); raft::matrix::slice(handle, make_const_mdspan(eigenvectors_k_slice), S_matrix, coords); raft::matrix::fill(handle, beta_k.view(), zero); auto beta_scalar = - raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); + raft::make_device_scalar_view(&((beta.view())(0, ncv - 1))); raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); From 11607227729910a713187f120512e827e975af01 Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 04:13:33 +0000 Subject: [PATCH 18/23] resolving pr comments --- cpp/include/raft/sparse/solver/lanczos.cuh | 31 ++++ cpp/include/raft_runtime/solver/lanczos.hpp | 46 +++-- .../raft_runtime/solver/lanczos_solver.cuh | 87 ++------- python/pylibraft/pylibraft/solver/lanczos.pyx | 175 ++++++++++++------ 4 files changed, 194 insertions(+), 145 deletions(-) diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index d65a675dac..adf8f33dca 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -28,6 +28,37 @@ namespace raft::sparse::solver { // Eigensolver // ========================================================= +template +auto lanczos_compute_smallest_eigenvectors( + raft::resources const& handle, + raft::device_vector_view rows, + raft::device_vector_view cols, + raft::device_vector_view vals, + lanczos_solver_config const& config, + raft::device_vector_view v0, + raft::device_vector_view eigenvalues, + raft::device_matrix_view eigenvectors) -> int +{ + IndexTypeT ncols = rows.extent(0) - 1; + IndexTypeT nrows = rows.extent(0) - 1; + IndexTypeT nnz = cols.extent(0); + + auto csr_structure = + raft::make_device_compressed_structure_view( + const_cast(rows.data_handle()), + const_cast(cols.data_handle()), + ncols, + nrows, + nnz); + + auto csr_matrix = + raft::make_device_csr_matrix_view( + const_cast(vals.data_handle()), csr_structure); + + return detail::lanczos_compute_smallest_eigenvectors( + handle, csr_matrix, config, v0, eigenvalues, eigenvectors); +} + template auto lanczos_compute_smallest_eigenvectors( raft::resources const& handle, diff --git a/cpp/include/raft_runtime/solver/lanczos.hpp b/cpp/include/raft_runtime/solver/lanczos.hpp index a08b5be394..4e043d345d 100644 --- a/cpp/include/raft_runtime/solver/lanczos.hpp +++ b/cpp/include/raft_runtime/solver/lanczos.hpp @@ -14,33 +14,45 @@ * limitations under the License. */ +#include #include #include +#include #include namespace raft::runtime::solver { /** - * @defgroup rmat_runtime lanczos Runtime API + * @defgroup lanczos_runtime lanczos Runtime API * @{ */ -#define FUNC_DECL(IndexType, ValueType) \ - void lanczos_solver(const raft::resources& handle, \ - IndexType* rows, \ - IndexType* cols, \ - ValueType* vals, \ - int nnz, \ - int n, \ - int n_components, \ - int max_iterations, \ - int ncv, \ - ValueType tolerance, \ - uint64_t seed, \ - ValueType* v0, \ - ValueType* eigenvalues, \ - ValueType* eigenvectors) +#define FUNC_DECL(IndexType, ValueType) \ + void lanczos_solver(const raft::resources& handle, \ + raft::device_vector_view rows, \ + raft::device_vector_view cols, \ + raft::device_vector_view vals, \ + raft::sparse::solver::lanczos_solver_config config, \ + raft::device_vector_view v0, \ + raft::device_vector_view eigenvalues, \ + raft::device_matrix_view eigenvectors) + +// #define FUNC_DECL(IndexType, ValueType) \ +// void lanczos_solver(const raft::resources& handle, \ +// IndexType* rows, \ +// IndexType* cols, \ +// ValueType* vals, \ +// int nnz, \ +// int n, \ +// int n_components, \ +// int max_iterations, \ +// int ncv, \ +// ValueType tolerance, \ +// uint64_t seed, \ +// ValueType* v0, \ +// ValueType* eigenvalues, \ +// ValueType* eigenvectors) FUNC_DECL(int, float); FUNC_DECL(int64_t, float); @@ -49,6 +61,6 @@ FUNC_DECL(int64_t, double); #undef FUNC_DECL -/** @} */ // end group rmat_runtime +/** @} */ // end group lanczos_runtime } // namespace raft::runtime::solver diff --git a/cpp/src/raft_runtime/solver/lanczos_solver.cuh b/cpp/src/raft_runtime/solver/lanczos_solver.cuh index f5ca2e3069..42b347e0e0 100644 --- a/cpp/src/raft_runtime/solver/lanczos_solver.cuh +++ b/cpp/src/raft_runtime/solver/lanczos_solver.cuh @@ -15,80 +15,17 @@ */ #include -#include -#include -#include -#include -#include - -template -void run_lanczos_solver(const raft::resources& handle, - IndexType* rows, - IndexType* cols, - ValueType* vals, - int nnz, - int n, - int n_components, - int max_iterations, - int ncv, - ValueType tolerance, - uint64_t seed, - ValueType* v0, - ValueType* eigenvalues, - ValueType* eigenvectors) -{ - auto stream = raft::resource::get_cuda_stream(handle); - raft::device_vector_view rows_view = - raft::make_device_vector_view(rows, n + 1); - raft::device_vector_view cols_view = - raft::make_device_vector_view(cols, nnz); - raft::device_vector_view vals_view = - raft::make_device_vector_view(vals, nnz); - raft::device_vector_view v0_view = - raft::make_device_vector_view(v0, n); - raft::device_vector_view eigenvalues_view = - raft::make_device_vector_view(eigenvalues, n_components); - raft::device_matrix_view eigenvectors_view = - raft::make_device_matrix_view( - eigenvectors, n, n_components); - - raft::spectral::matrix::sparse_matrix_t const csr_m{ - handle, rows_view.data_handle(), cols_view.data_handle(), vals_view.data_handle(), n, nnz}; - raft::sparse::solver::lanczos_solver_config config{ - n_components, max_iterations, ncv, tolerance, seed}; - raft::sparse::solver::lanczos_compute_smallest_eigenvectors( - handle, csr_m, config, v0_view, eigenvalues_view, eigenvectors_view); -} - -#define FUNC_DEF(IndexType, ValueType) \ - void lanczos_solver(const raft::resources& handle, \ - IndexType* rows, \ - IndexType* cols, \ - ValueType* vals, \ - int nnz, \ - int n, \ - int n_components, \ - int max_iterations, \ - int ncv, \ - ValueType tolerance, \ - uint64_t seed, \ - ValueType* v0, \ - ValueType* eigenvalues, \ - ValueType* eigenvectors) \ - { \ - run_lanczos_solver(handle, \ - rows, \ - cols, \ - vals, \ - nnz, \ - n, \ - n_components, \ - max_iterations, \ - ncv, \ - tolerance, \ - seed, \ - v0, \ - eigenvalues, \ - eigenvectors); \ +#define FUNC_DEF(IndexType, ValueType) \ + void lanczos_solver(const raft::resources& handle, \ + raft::device_vector_view rows, \ + raft::device_vector_view cols, \ + raft::device_vector_view vals, \ + raft::sparse::solver::lanczos_solver_config config, \ + raft::device_vector_view v0, \ + raft::device_vector_view eigenvalues, \ + raft::device_matrix_view eigenvectors) \ + { \ + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( \ + handle, rows, cols, vals, config, v0, eigenvalues, eigenvectors); \ } diff --git a/python/pylibraft/pylibraft/solver/lanczos.pyx b/python/pylibraft/pylibraft/solver/lanczos.pyx index 43b3bda221..78c9695f7c 100644 --- a/python/pylibraft/pylibraft/solver/lanczos.pyx +++ b/python/pylibraft/pylibraft/solver/lanczos.pyx @@ -22,20 +22,83 @@ import cupy as cp import numpy as np from cython.operator cimport dereference as deref -from libc.stdint cimport int64_t, uint64_t, uintptr_t +from libc.stdint cimport int64_t, uint32_t, uint64_t, uintptr_t from pylibraft.common import Handle, cai_wrapper, device_ndarray from pylibraft.common.handle import auto_sync_handle from libcpp cimport bool +from pylibraft.common.cpp.mdspan cimport ( + col_major, + device_matrix_view, + device_vector_view, + make_device_matrix_view, + make_device_vector_view, + row_major, +) from pylibraft.common.handle cimport device_resources from pylibraft.random.cpp.rng_state cimport RngState +cdef extern from "raft/sparse/solver/lanczos_types.hpp" \ + namespace "raft::sparse::solver" nogil: + + cdef cppclass lanczos_solver_config[IndexTypeT, ValueTypeT]: + int n_components + int max_iterations + int ncv + ValueTypeT tolerance + uint64_t seed + +cdef lanczos_solver_config[int, float] config_int_float +cdef lanczos_solver_config[int64_t, float] config_int64_float +cdef lanczos_solver_config[int, double] config_int_double +cdef lanczos_solver_config[int64_t, double] config_int64_double + cdef extern from "raft_runtime/solver/lanczos.hpp" \ namespace "raft::runtime::solver" nogil: + cdef void lanczos_solver( + const device_resources &handle, + device_vector_view[int64_t, uint32_t] rows, + device_vector_view[int64_t, uint32_t] cols, + device_vector_view[double, uint32_t] vals, + lanczos_solver_config[int64_t, double] config, + device_vector_view[double, uint32_t] v0, + device_vector_view[double, uint32_t] eigenvalues, + device_matrix_view[double, uint32_t, col_major] eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + device_vector_view[int64_t, uint32_t] rows, + device_vector_view[int64_t, uint32_t] cols, + device_vector_view[float, uint32_t] vals, + lanczos_solver_config[int64_t, float] config, + device_vector_view[float, uint32_t] v0, + device_vector_view[float, uint32_t] eigenvalues, + device_matrix_view[float, uint32_t, col_major] eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + device_vector_view[int, uint32_t] rows, + device_vector_view[int, uint32_t] cols, + device_vector_view[double, uint32_t] vals, + lanczos_solver_config[int, double] config, + device_vector_view[double, uint32_t] v0, + device_vector_view[double, uint32_t] eigenvalues, + device_matrix_view[double, uint32_t, col_major] eigenvectors) except + + + cdef void lanczos_solver( + const device_resources &handle, + device_vector_view[int, uint32_t] rows, + device_vector_view[int, uint32_t] cols, + device_vector_view[float, uint32_t] vals, + lanczos_solver_config[int, float] config, + device_vector_view[float, uint32_t] v0, + device_vector_view[float, uint32_t] eigenvalues, + device_matrix_view[float, uint32_t, col_major] eigenvectors) except + + cdef void lanczos_solver( const device_resources &handle, int64_t* rows, @@ -158,6 +221,7 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, N = A.shape[0] n = N + nnz = A.nnz rows_ptr = rows.data cols_ptr = cols.data @@ -193,73 +257,78 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, handle = handle if handle is not None else Handle() cdef device_resources *h = handle.getHandle() + if IndexType == np.int32 and ValueType == np.float32: + config_int_float.n_components = k + config_int_float.max_iterations = maxiter + config_int_float.ncv = ncv + config_int_float.tolerance = tol + config_int_float.seed = seed + elif IndexType == np.int64 and ValueType == np.float32: + config_int64_float.n_components = k + config_int64_float.max_iterations = maxiter + config_int64_float.ncv = ncv + config_int64_float.tolerance = tol + config_int64_float.seed = seed + elif IndexType == np.int32 and ValueType == np.float64: + config_int_double.n_components = k + config_int_double.max_iterations = maxiter + config_int_double.ncv = ncv + config_int_double.tolerance = tol + config_int_double.seed = seed + elif IndexType == np.int64 and ValueType == np.float64: + config_int64_double.n_components = k + config_int64_double.max_iterations = maxiter + config_int64_double.ncv = ncv + config_int64_double.tolerance = tol + config_int64_double.seed = seed + if IndexType == np.int32 and ValueType == np.float32: lanczos_solver( deref(h), - rows_ptr, - cols_ptr, - vals_ptr, - A.nnz, - N, - k, - maxiter, - ncv, - tol, - seed, - v0_ptr, - eigenvalues_ptr, - eigenvectors_ptr, + make_device_vector_view(rows_ptr, (N + 1)), + make_device_vector_view(cols_ptr, nnz), + make_device_vector_view(vals_ptr, nnz), + config_int_float, + make_device_vector_view(v0_ptr, N), + make_device_vector_view(eigenvalues_ptr, k), + make_device_matrix_view[float, uint32_t, col_major]( + eigenvectors_ptr, N, k), ) elif IndexType == np.int64 and ValueType == np.float32: lanczos_solver( deref(h), - rows_ptr, - cols_ptr, - vals_ptr, - A.nnz, - N, - k, - maxiter, - ncv, - tol, - seed, - v0_ptr, - eigenvalues_ptr, - eigenvectors_ptr, + make_device_vector_view(rows_ptr, (N + 1)), + make_device_vector_view(cols_ptr, nnz), + make_device_vector_view(vals_ptr, nnz), + config_int64_float, + make_device_vector_view(v0_ptr, N), + make_device_vector_view(eigenvalues_ptr, k), + make_device_matrix_view[float, uint32_t, col_major]( + eigenvectors_ptr, N, k), ) elif IndexType == np.int32 and ValueType == np.float64: lanczos_solver( deref(h), - rows_ptr, - cols_ptr, - vals_ptr, - A.nnz, - N, - k, - maxiter, - ncv, - tol, - seed, - v0_ptr, - eigenvalues_ptr, - eigenvectors_ptr, + make_device_vector_view(rows_ptr, (N + 1)), + make_device_vector_view(cols_ptr, nnz), + make_device_vector_view(vals_ptr, nnz), + config_int_double, + make_device_vector_view(v0_ptr, N), + make_device_vector_view(eigenvalues_ptr, k), + make_device_matrix_view[double, uint32_t, col_major]( + eigenvectors_ptr, N, k), ) elif IndexType == np.int64 and ValueType == np.float64: lanczos_solver( deref(h), - rows_ptr, - cols_ptr, - vals_ptr, - A.nnz, - N, - k, - maxiter, - ncv, - tol, - seed, - v0_ptr, - eigenvalues_ptr, - eigenvectors_ptr, + make_device_vector_view(rows_ptr, (N + 1)), + make_device_vector_view(cols_ptr, nnz), + make_device_vector_view(vals_ptr, nnz), + config_int64_double, + make_device_vector_view(v0_ptr, N), + make_device_vector_view(eigenvalues_ptr, k), + make_device_matrix_view[double, uint32_t, col_major]( + eigenvectors_ptr, N, k), ) else: raise ValueError("dtype IndexType=%s and ValueType=%s not supported" % From e473728b9cadff5377cb655b8586cac293b4bfff Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 04:24:52 +0000 Subject: [PATCH 19/23] resolving pr comments --- cpp/include/raft/sparse/solver/lanczos.cuh | 13 ------------- cpp/include/raft_runtime/solver/lanczos.hpp | 19 ++----------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index adf8f33dca..82b0ec9e31 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -68,19 +68,6 @@ auto lanczos_compute_smallest_eigenvectors( raft::device_vector_view eigenvalues, raft::device_matrix_view eigenvectors) -> int { - // auto c_structure = raft::make_device_compressed_structure_view( - // c_indptr_d.data(), - // c_indices_d.data(), - // params.m, - // params.n, - // static_cast(c_indices_d.size())); - - // auto mask = - // raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); - - // auto c = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - - // FIXME: move out of function IndexTypeT ncols = A.ncols_; IndexTypeT nrows = A.nrows_; IndexTypeT nnz = A.nnz_; diff --git a/cpp/include/raft_runtime/solver/lanczos.hpp b/cpp/include/raft_runtime/solver/lanczos.hpp index 4e043d345d..02999e847d 100644 --- a/cpp/include/raft_runtime/solver/lanczos.hpp +++ b/cpp/include/raft_runtime/solver/lanczos.hpp @@ -14,9 +14,10 @@ * limitations under the License. */ +#pragma once + #include #include -#include #include #include @@ -38,22 +39,6 @@ namespace raft::runtime::solver { raft::device_vector_view eigenvalues, \ raft::device_matrix_view eigenvectors) -// #define FUNC_DECL(IndexType, ValueType) \ -// void lanczos_solver(const raft::resources& handle, \ -// IndexType* rows, \ -// IndexType* cols, \ -// ValueType* vals, \ -// int nnz, \ -// int n, \ -// int n_components, \ -// int max_iterations, \ -// int ncv, \ -// ValueType tolerance, \ -// uint64_t seed, \ -// ValueType* v0, \ -// ValueType* eigenvalues, \ -// ValueType* eigenvectors) - FUNC_DECL(int, float); FUNC_DECL(int64_t, float); FUNC_DECL(int, double); From cc22a398195cbceb3ae3f153772295e483af6bcc Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 04:36:06 +0000 Subject: [PATCH 20/23] resolving pr comments --- python/pylibraft/pylibraft/solver/lanczos.pyx | 99 +++---------------- 1 file changed, 15 insertions(+), 84 deletions(-) diff --git a/python/pylibraft/pylibraft/solver/lanczos.pyx b/python/pylibraft/pylibraft/solver/lanczos.pyx index 78c9695f7c..c0788645dd 100644 --- a/python/pylibraft/pylibraft/solver/lanczos.pyx +++ b/python/pylibraft/pylibraft/solver/lanczos.pyx @@ -99,70 +99,6 @@ cdef extern from "raft_runtime/solver/lanczos.hpp" \ device_vector_view[float, uint32_t] eigenvalues, device_matrix_view[float, uint32_t, col_major] eigenvectors) except + - cdef void lanczos_solver( - const device_resources &handle, - int64_t* rows, - int64_t* cols, - double* vals, - int nnz, - int n, - int n_components, - int max_iterations, - int ncv, - double tolerance, - uint64_t seed, - double* v0, - double* eigenvalues, - double* eigenvectors) except + - - cdef void lanczos_solver( - const device_resources &handle, - int64_t* rows, - int64_t* cols, - float* vals, - int nnz, - int n, - int n_components, - int max_iterations, - int ncv, - float tolerance, - uint64_t seed, - float* v0, - float* eigenvalues, - float* eigenvectors) except + - - cdef void lanczos_solver( - const device_resources &handle, - int* rows, - int* cols, - double* vals, - int nnz, - int n, - int n_components, - int max_iterations, - int ncv, - double tolerance, - uint64_t seed, - double* v0, - double* eigenvalues, - double* eigenvectors) except + - - cdef void lanczos_solver( - const device_resources &handle, - int* rows, - int* cols, - float* vals, - int nnz, - int n, - int n_components, - int max_iterations, - int ncv, - float tolerance, - uint64_t seed, - float* v0, - float* eigenvalues, - float* eigenvectors) except + - @auto_sync_handle def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, @@ -263,26 +199,6 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, config_int_float.ncv = ncv config_int_float.tolerance = tol config_int_float.seed = seed - elif IndexType == np.int64 and ValueType == np.float32: - config_int64_float.n_components = k - config_int64_float.max_iterations = maxiter - config_int64_float.ncv = ncv - config_int64_float.tolerance = tol - config_int64_float.seed = seed - elif IndexType == np.int32 and ValueType == np.float64: - config_int_double.n_components = k - config_int_double.max_iterations = maxiter - config_int_double.ncv = ncv - config_int_double.tolerance = tol - config_int_double.seed = seed - elif IndexType == np.int64 and ValueType == np.float64: - config_int64_double.n_components = k - config_int64_double.max_iterations = maxiter - config_int64_double.ncv = ncv - config_int64_double.tolerance = tol - config_int64_double.seed = seed - - if IndexType == np.int32 and ValueType == np.float32: lanczos_solver( deref(h), make_device_vector_view(rows_ptr, (N + 1)), @@ -295,6 +211,11 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, eigenvectors_ptr, N, k), ) elif IndexType == np.int64 and ValueType == np.float32: + config_int64_float.n_components = k + config_int64_float.max_iterations = maxiter + config_int64_float.ncv = ncv + config_int64_float.tolerance = tol + config_int64_float.seed = seed lanczos_solver( deref(h), make_device_vector_view(rows_ptr, (N + 1)), @@ -307,6 +228,11 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, eigenvectors_ptr, N, k), ) elif IndexType == np.int32 and ValueType == np.float64: + config_int_double.n_components = k + config_int_double.max_iterations = maxiter + config_int_double.ncv = ncv + config_int_double.tolerance = tol + config_int_double.seed = seed lanczos_solver( deref(h), make_device_vector_view(rows_ptr, (N + 1)), @@ -319,6 +245,11 @@ def eigsh(A, k=6, v0=None, ncv=None, maxiter=None, eigenvectors_ptr, N, k), ) elif IndexType == np.int64 and ValueType == np.float64: + config_int64_double.n_components = k + config_int64_double.max_iterations = maxiter + config_int64_double.ncv = ncv + config_int64_double.tolerance = tol + config_int64_double.seed = seed lanczos_solver( deref(h), make_device_vector_view(rows_ptr, (N + 1)), From 8767c6a809717a5146855a87b1178ae247fb5a0a Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 17:33:03 +0000 Subject: [PATCH 21/23] resolving pr comments --- .../raft/sparse/solver/detail/lanczos.cuh | 74 +++++++++++++++++-- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index d8499e0749..8c7b30726d 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1661,7 +1661,16 @@ void lanczos_aux(raft::resources const& handle, kernel_clamp_down<<<1, 1, 0, stream>>>(alpha_i.data_handle(), static_cast(1e-9)); - raft::linalg::nrm2(handle, n, u.data_handle(), 1, &beta(0, i), stream); + raft::device_vector_view output = + raft::make_device_vector_view(&beta(0, i), 1); + raft::device_matrix_view input = + raft::make_device_matrix_view(u.data_handle(), 1, n); + raft::linalg::norm(handle, + input, + output, + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); int blockSize = 256; int numBlocks = (n + blockSize - 1) / blockSize; @@ -1714,7 +1723,18 @@ auto lanczos_smallest( auto cublas_h = resource::get_cublas_handle(handle); ValueTypeT v0nrm = 0; - raft::linalg::nrm2(handle, n, v0_view.data_handle(), 1, &v0nrm, stream); + + raft::device_vector output1 = + raft::make_device_vector(handle, 1); + raft::device_matrix_view input1 = + raft::make_device_matrix_view(v0_view.data_handle(), 1, n); + raft::linalg::norm(handle, + input1, + output1.view(), + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); + raft::copy(&v0nrm, output1.data_handle(), 1, stream); raft::device_scalar v0nrm_scalar = raft::make_device_scalar(handle, v0nrm); @@ -1795,7 +1815,18 @@ auto lanczos_smallest( raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); ValueTypeT res = 0; - raft::linalg::nrm2(handle, nEigVecs, beta_k.data_handle(), 1, &res, stream); + + raft::device_vector output = + raft::make_device_vector(handle, 1); + raft::device_matrix_view input = + raft::make_device_matrix_view(beta_k.data_handle(), 1, nEigVecs); + raft::linalg::norm(handle, + input, + output.view(), + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); + raft::copy(&res, output.data_handle(), 1, stream); std::cout << "res " << res << std::endl; @@ -1848,7 +1879,18 @@ auto lanczos_smallest( auto V_0_view = raft::make_device_matrix_view(V.data_handle() + (nEigVecs * n), 1, n); ValueTypeT unrm = 0; - raft::linalg::nrm2(handle, n, u.data_handle(), 1, &unrm, stream); + + raft::device_vector output = + raft::make_device_vector(handle, 1); + raft::device_matrix_view input = + raft::make_device_matrix_view(u.data_handle(), 1, n); + raft::linalg::norm(handle, + input, + output.view(), + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); + raft::copy(&unrm, output.data_handle(), 1, stream); raft::device_scalar unrm_scalar = raft::make_device_scalar(handle, unrm); @@ -1970,8 +2012,16 @@ auto lanczos_smallest( kernel_subtract_and_scale<<>>( u.data_handle(), temp.data_handle(), one_scalar.data_handle(), n); - raft::linalg::nrm2( - handle, n, u.data_handle(), 1, &((beta.view())(0, nEigVecs)), stream); + raft::device_vector_view output1 = + raft::make_device_vector_view(&((beta.view())(0, nEigVecs)), 1); + raft::device_matrix_view input1 = + raft::make_device_matrix_view(u.data_handle(), 1, n); + raft::linalg::norm(handle, + input1, + output1, + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); auto V_kplus1 = raft::make_device_vector_view(&(V.view()(nEigVecs + 1, 0)), n); auto u_vector = raft::make_device_vector_view(u.data_handle(), n); @@ -2032,7 +2082,17 @@ auto lanczos_smallest( raft::linalg::axpy(handle, beta_scalar, raft::make_const_mdspan(s.view()), beta_k.view()); - raft::linalg::nrm2(handle, nEigVecs, beta_k.data_handle(), 1, &res, stream); + raft::device_vector output2 = + raft::make_device_vector(handle, 1); + raft::device_matrix_view input2 = + raft::make_device_matrix_view(beta_k.data_handle(), 1, nEigVecs); + raft::linalg::norm(handle, + input2, + output2.view(), + raft::linalg::L2Norm, + raft::linalg::Apply::ALONG_ROWS, + raft::sqrt_op()); + raft::copy(&res, output2.data_handle(), 1, stream); std::cout << "res " << res << " " << iter << std::endl; } From a3809eb78abd68c785ce861aa20f2009f210514b Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 17:41:56 +0000 Subject: [PATCH 22/23] resolving pr comments --- cpp/include/raft/linalg/detail/norm.cuh | 11 ----------- cpp/include/raft/linalg/norm.cuh | 7 ------- 2 files changed, 18 deletions(-) diff --git a/cpp/include/raft/linalg/detail/norm.cuh b/cpp/include/raft/linalg/detail/norm.cuh index a5a4043331..24da634575 100644 --- a/cpp/include/raft/linalg/detail/norm.cuh +++ b/cpp/include/raft/linalg/detail/norm.cuh @@ -17,8 +17,6 @@ #pragma once #include -#include -#include #include #include @@ -140,15 +138,6 @@ void colNormCaller(OutType* dots, }; } -template -void nrm2( - raft::resources const& handle, int n, const T* x, int incx, T* result, cudaStream_t stream) -{ - cublasHandle_t cublas_h = resource::get_cublas_handle(handle); - detail::cublas_device_pointer_mode pmode(cublas_h); - detail::cublasnrm2(cublas_h, n, x, incx, result, stream); -} - }; // end namespace detail }; // end namespace linalg }; // end namespace raft diff --git a/cpp/include/raft/linalg/norm.cuh b/cpp/include/raft/linalg/norm.cuh index 3bf4c9772b..4270149793 100644 --- a/cpp/include/raft/linalg/norm.cuh +++ b/cpp/include/raft/linalg/norm.cuh @@ -160,13 +160,6 @@ void norm(raft::resources const& handle, } } -template -void nrm2( - raft::resources const& handle, int n, const T* x, int incx, T* result, cudaStream_t stream) -{ - detail::nrm2(handle, n, x, incx, result, stream); -} - /** @} */ }; // end namespace linalg From d4b495549c2e998d7c6328ef42825f363bfd2713 Mon Sep 17 00:00:00 2001 From: aamijar Date: Mon, 9 Sep 2024 18:14:40 +0000 Subject: [PATCH 23/23] resolving pr comments --- cpp/include/raft/sparse/solver/detail/lanczos.cuh | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 8c7b30726d..9694a48c5c 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -1564,8 +1564,6 @@ void lanczos_aux(raft::resources const& handle, raft::copy(v.data_handle(), &(V(start_idx, 0)), n, stream); - std::cout << start_idx << " " << end_idx << std::endl; - auto cusparse_h = resource::get_cusparse_handle(handle); cusparseSpMatDescr_t cusparse_A; raft::sparse::detail::cusparsecreatecsr(&cusparse_A, @@ -1708,8 +1706,6 @@ auto lanczos_smallest( int ncv = restartIter; auto stream = resource::get_cuda_stream(handle); - std::cout << std::fixed << std::setprecision(7); // Set precision to 10 decimal places - raft::device_matrix V = raft::make_device_matrix(handle, ncv, n); raft::device_matrix_view V_0_view = @@ -1828,8 +1824,6 @@ auto lanczos_smallest( raft::sqrt_op()); raft::copy(&res, output.data_handle(), 1, stream); - std::cout << "res " << res << std::endl; - auto uu = raft::make_device_matrix(handle, 0, nEigVecs); int iter = ncv; while (res > tol && iter < maxIter) { @@ -2094,7 +2088,7 @@ auto lanczos_smallest( raft::sqrt_op()); raft::copy(&res, output2.data_handle(), 1, stream); - std::cout << "res " << res << " " << iter << std::endl; + RAFT_LOG_TRACE("Iteration %f: residual (tolerance) %d", iter, res); } raft::copy(eigVals_dev, eigenvalues_k.data_handle(), nEigVecs, stream);