From af9b92fa827561996a6d184b593b49a74660f13f Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Mon, 2 Sep 2024 09:50:18 +0800 Subject: [PATCH] [fix](move-memtable) do not execute close if create rowset failed when loading MOW table (#40105) Core dump happened when load to MOW table: ``` Check failure stack trace: *** @ 0x55fae437d246 google::LogMessage::SendToLog() @ 0x55fae4379c90 google::LogMessage::Flush() @ 0x55fae437da89 google::LogMessageFatal::~LogMessageFatal() @ 0x55faacf26bbf doris::BaseTablet::check_delete_bitmap_correctness() @ 0x55fab05049ef doris::RowsetBuilder::commit_txn() @ 0x55fab09026e8 doris::LoadStreamWriter::close() @ 0x55fab089eff7 std::_Function_handler<>::_M_invoke() @ 0x55fab0d14d7c doris::WorkThreadPool<>::work_thread() @ 0x55fae76ae6f0 execute_native_thread_routine @ 0x7fa32ea45ac3 (unknown) @ 0x7fa32ead7850 (unknown) @ (nil) (unknown) Query id: a21981d5c8ef4113-84df9a5a8680e004 *** is nereids: 0 *** tablet id: 0 *** Aborted at 1724668499 (unix time) try "date -d @1724668499" if you are using GNU date *** Current BE git commitID: 2f848737c1 *** SIGABRT unknown detail explain (@0x20db) received by PID 8411 (TID 9837 OR 0x7f9e42cfe640) from PID 8411; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421 1# 0x00007FA32E9F3520 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill at ./nptl/pthread_kill.c:89 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# 0x000055FAE4387B1D in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 6# 0x000055FAE437A15A in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 7# google::LogMessage::SendToLog() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 8# google::LogMessage::Flush() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 9# google::LogMessageFatal::~LogMessageFatal() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 10# doris::BaseTablet::check_delete_bitmap_correctness(std::shared_ptr, long, long, std::unordered_set, std::equal_to, std::allocator > const&, std::vector, std::allocator > >*) at /home/zcp/repo_center/doris_master/doris/be/src/olap/base_tablet.cpp:1152 11# doris::RowsetBuilder::commit_txn() at /home/zcp/repo_center/doris_master/doris/be/src/olap/rowset_builder.cpp:316 12# doris::LoadStreamWriter::close() at /home/zcp/repo_center/doris_master/doris/be/src/runtime/load_stream_writer.cpp:311 13# std::_Function_handler::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 14# doris::WorkThreadPool::work_thread(int) at /home/zcp/repo_center/doris_master/doris/be/src/util/work_thread_pool.hpp:159 15# execute_native_thread_routine at ../../../../../libstdc+-v3/src/c+11/thread.cc:84 16# start_thread at ./nptl/pthread_create.c:442 17# 0x00007FA32EAD7850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83 ``` If create rowset failed,` calc_delete_bitmap_task` still could be executed: ``` add segment failed load_id=5649413b98976f0d-a105b42749f561b0, txn_id=2, tablet_id=10088, status=[INTERNAL_ERROR]create row set failed ... submit calc delete bitmap task to executor, tablet_id: 10088, txn_id: 2 ``` This PR skips close to avoid `submit_calc_delete_bitmap_task` if create rowset failed when loading MOW table to solve this problem. --- be/src/runtime/load_stream.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/be/src/runtime/load_stream.cpp b/be/src/runtime/load_stream.cpp index 61152a034f7433..a08d175216642a 100644 --- a/be/src/runtime/load_stream.cpp +++ b/be/src/runtime/load_stream.cpp @@ -272,6 +272,13 @@ Status TabletStream::close() { return _status; } + // it is necessary to check status after wait_func, + // for create_rowset could fail during add_segment when loading to MOW table, + // in this case, should skip close to avoid submit_calc_delete_bitmap_task which could cause coredump. + if (!_status.ok()) { + return _status; + } + auto close_func = [this, &mu, &cv]() { signal::set_signal_task_id(_load_id); auto st = _load_stream_writer->close();