crash if using cuda executor for reorder #1514

uboats · 2023-12-18T12:04:28Z

I had a crash in when choosing cuda as executor. The matrix csr format and rhs/sol vector are from std::vector

It crashed at "reordering = gko::experimental::reorder::Amd::build().on(g_exec)->generate(matrix);"
It also crashed if "reordering = gko::experimental::reorder::Amd::build().on(g_exec)->generate(matrix->transpose());"

============================================
Here's example code:
// values, col_idx, and row_ptrs are std::vectors
const auto g_exec = exec_map.at("cuda")(); // throws if not valid

// executor used by the application

// initialize matrix and vectors
auto matrix_host = mtx::create(g_exec->get_master(), gko::dim<2>(m_size),
		                  val_array::view(g_exec->get_master(), m_nnz, values.data()),
                          idx_array::view(g_exec->get_master(), m_nnz, col_idxs.data()),
                          idx_array::view(g_exec->get_master(), m_size + 1, row_ptrs.data()));
auto matrix = gko::share(gko::clone(g_exec, matrix_host));

auto b_host = vec::create(g_exec->get_master(), gko::dim<2>(m_size, 1));

auto x_host = vec::create(g_exec->get_master(), gko::dim<2>(m_size, 1));

for (IndexType ii = 0; ii < m_size; ii++) {
    b_host->at(ii, 0) = res[ii];
    x_host->at(ii, 0) = u[ii];
}

auto b = gko::clone(g_exec, b_host);
auto x = gko::clone(g_exec, x_host);

std::shared_ptr<gko::matrix::Permutation<IndexType>> reordering;
reordering = gko::experimental::reorder::Amd<IndexType>::build().on(g_exec)->generate(matrix); //

std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>> mat_reorder;
std::unique_ptr<gko::matrix::Dense<>, std::default_delete<gko::matrix::Dense<> > > b_reorder, x_reorder;

{
    mat_reorder = gko::share(matrix->permute(reordering));
    b_reorder   = b->permute(reordering, gko::matrix::permute_mode::rows);
    x_reorder   = x->permute(reordering, gko::matrix::permute_mode::rows);
}

std::shared_ptr<gko::LinOpFactory> par_ilu_fact;
par_ilu_fact = gko::factorization::ParIlut<ValueType, IndexType>::build()
        .with_fill_in_limit(limit)
        .on(g_exec);

auto par_ilu = gko::share(par_ilu_fact->generate(clone(g_exec, mat_reorder)));

auto ilu_pre_factory =
    gko::preconditioner::Ilu<gko::solver::LowerTrs<ValueType, IndexType>,
                             gko::solver::UpperTrs<ValueType, IndexType>,
                             false>::build()
        .on(g_exec);

// Use incomplete factors to generate ILU preconditioner
auto ilu_preconditioner = gko::share(ilu_pre_factory->generate(par_ilu));
........

============================================
Below is the stack:

[0] from 0x000000000de0e177 in gko::experimental::reorder::suitesparse_wrapper::amd_2(int, int*, int*, int*, int, int, int*, int*, int*, int*, int*, int*, int*, double*, double*)
[1] from 0x000000000dc9a390 in gko::experimental::reorder::suitesparse_wrapper::amd_reorder(int, int*, int*, int*, int, int*, int*, int*, int*, int*, int*, int*)
[2] from 0x000000000dca17a9 in gko::detail::RegisteredOperation<gko::experimental::reorder::suitesparse_wrapper::make_amd_reorder<int, int*, int*, int*, int, int* const&, int* const&, int* const&, int* const&, int* const&, int* const&, int* const&>(int&&, int*&&, int*&&, int*&&, int&&, int* const&, int* const&, int* const&, int* const&, int* const&, int* const&, int* const&)::{lambda(auto:1)#1}>::run(std::shared_ptr<gko::CudaExecutor const>) const
[3] from 0x000000000e8f29f0 in gko::detail::ExecutorBasegko::CudaExecutor::run(gko::Operation const&) const
[4] from 0x000000000dca41dd in gko::experimental::reorder::Amd::generate_impl(std::shared_ptr<gko::LinOp const>) const
[5] from 0x0000000002b0ac1c in gko::AbstractFactory<gko::LinOp, std::shared_ptr<gko::LinOp const> >::generate<std::shared_ptr<gko::LinOp const>&>(std::shared_ptr<gko::LinOp const>&) const+102 at /tools/ginkgo/ginkgo//include/ginkgo/core/base/abstract_factory.hpp:69
[6] from 0x0000000002b06551 in gko::LinOpFactory::generate(std::shared_ptr<gko::LinOp const>) const+203 at /tools/ginkgo/ginkgo//include/ginkgo/core/base/lin_op.hpp:397
[7] from 0x000000000dca3498 in gko::experimental::reorder::Amd::generate(std::shared_ptr<gko::LinOp const>) const

The text was updated successfully, but these errors were encountered:

MarcelKoch · 2023-12-19T08:36:43Z

As part of our testing, we also test AMD with the cuda executor. You can check these test for yourself by calling

cmake --build . -t test_reorder_amd_cuda
ctest -R "amd_cuda"

in your build directory with the cmake option GINKGO_BUILD_TESTS=ON. Please check if these tests already fail for you.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

crash if using cuda executor for reorder #1514

crash if using cuda executor for reorder #1514

uboats commented Dec 18, 2023 •

edited

Loading

MarcelKoch commented Dec 19, 2023

crash if using cuda executor for reorder #1514

crash if using cuda executor for reorder #1514

Comments

uboats commented Dec 18, 2023 • edited Loading

MarcelKoch commented Dec 19, 2023

uboats commented Dec 18, 2023 •

edited

Loading