From 843cc86db5b1b94bbeff322a78420c345bc0560a Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Sun, 11 Aug 2024 06:32:23 +0800 Subject: [PATCH] [fix](ub) undefined behavior in FixedContainer (#39191) ## Proposed changes Undefined behavior occurs if there is a null value in the list. ``` /root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null pointer passed as argument 2, which is declared to never be null /var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull attribute specified here #0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:271:41 #1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:274:60 #2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&) const /root/doris/be/src/exprs/hybrid_set.h:76:36 #3 0x5616d072245d in void doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned long, doris::vectorized::PODArray, 16ul, 15ul> const*, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/exprs/hybrid_set.h:688:63 #4 0x5616d0747857 in doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/in.h:170:21 #5 0x5616c741fa3a in doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/function.h:462:26 #6 0x5616cbb5b650 in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp #7 0x5616cbb4e14e in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12 #8 0x5616cbb4e3c2 in doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:250:12 #9 0x5616c741cd68 in doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.h:190:19 #10 0x5616c74cf712 in doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5 #11 0x5616c740d5c0 in doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*, std::vector>&) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9 #12 0x5616c740ecf5 in doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12 #13 0x5616c7462e24 in doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vcompound_pred.h:127:38 #14 0x5616c74bccec in doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5 #15 0x5616c74c1dcc in doris::vectorized::VExprContext::execute_conjuncts(std::vector, std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul, 15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray, 16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9 #16 0x5616c74c5108 in doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector, std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5 #17 0x5616ad8a7f1a in doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*, unsigned short&, doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5 #18 0x5616ad89e86e in doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25 #19 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9 #20 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19 #21 0x5616ad6dd9cc in doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33 #22 0x5616ad269d67 in doris::BetaRowsetReader::next_block(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29 #23 0x5616de6de110 in doris::vectorized::VCollectIterator::Level0Iterator::_refresh() /root/doris/be/src/vec/olap/vcollect_iterator.h #24 0x5616de6c967f in doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24 #25 0x5616de6ca8a6 in doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14 #26 0x5616de6d7008 in doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27 #27 0x5616de6bd200 in doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&) /root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9 #28 0x5616de651b6c in doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5 #29 0x5616de65526f in doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:229:19 #30 0x5616e175a0f9 in doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) /root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32 #31 0x5616c736ad34 in doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5 #32 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21 #33 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31 #34 0x5616c736f05e in void std::_invoke_impl, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14 #35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()() const::'lambda'()&>, void>::type std::_invoke_r, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2 #36 0x5616c736f05e in std::_Function_handler, std::shared_ptr)::$_1::operator()() const::'lambda'()>::_M_invoke(std::_Any_data const&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9 #37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:543:24 #38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 #39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8 #40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /root/doris/be/src/vec/common/string_ref.h:271:54 in ``` --- be/src/exprs/hybrid_set.h | 35 +++++++++++++++++++ be/src/vec/functions/in.h | 2 +- .../data/nereids_syntax_p0/inpredicate.out | 9 +++++ .../nereids_syntax_p0/inpredicate.groovy | 16 +++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index b75cc81ebf1f14..f0977a652b1cbe 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -17,7 +17,13 @@ #pragma once +#include + +#include + +#include "common/exception.h" #include "common/object_pool.h" +#include "common/status.h" #include "exprs/runtime_filter.h" #include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" @@ -60,8 +66,16 @@ class FixedContainer { } } + void check_size() { + if (N != _size) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "invalid size of FixedContainer<{}>: {}", N, _size); + } + } + // Use '|' instead of '||' has better performance by test. ALWAYS_INLINE bool find(const T& value) const { + DCHECK_EQ(N, _size); if constexpr (N == 0) { return false; } @@ -144,6 +158,12 @@ class FixedContainer { size_t _size {}; }; +template +struct IsFixedContainer : std::false_type {}; + +template +struct IsFixedContainer> : std::true_type {}; + /** * Dynamic Container uses phmap::flat_hash_set. * @tparam T Element Type @@ -354,6 +374,11 @@ class HybridSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { if constexpr (!is_nullable && !is_negative) { @@ -507,6 +532,11 @@ class StringSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { const auto& string_data = col.get_data_at(i).to_string(); @@ -675,6 +705,11 @@ class StringValueSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { uint32_t len = offset[i] - offset[i - 1]; diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index b25ad8eeb67e06..9b5c5bb023aee2 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -114,7 +114,7 @@ class FunctionIn : public IFunction { context->get_arg_type(0)->type == PrimitiveType::TYPE_VARCHAR || context->get_arg_type(0)->type == PrimitiveType::TYPE_STRING) { // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly - state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args() - 1))); + state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context))); } else { state->hybrid_set.reset( create_set(context->get_arg_type(0)->type, get_size_with_out_null(context))); diff --git a/regression-test/data/nereids_syntax_p0/inpredicate.out b/regression-test/data/nereids_syntax_p0/inpredicate.out index cee03178b5cba6..ac6219c69cee60 100644 --- a/regression-test/data/nereids_syntax_p0/inpredicate.out +++ b/regression-test/data/nereids_syntax_p0/inpredicate.out @@ -31,3 +31,12 @@ 29 Supplier#000000029 VVSymB3fbwaN ARGENTINA4 ARGENTINA AMERICA 11-773-203-7342 9 Supplier#000000009 ,gJ6K2MKveYxQT IRAN 6 IRAN MIDDLE EAST 20-338-906-3675 +-- !in_predicate_11 -- +15 Supplier#000000015 DF35PepL5saAK INDIA 0 INDIA ASIA 18-687-542-7601 + +-- !in_predicate_12 -- + +-- !in_predicate_13 -- + +-- !in_predicate_14 -- + diff --git a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy index 3cdf096519c4fa..bf4ec9787f9e07 100644 --- a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy +++ b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy @@ -61,5 +61,21 @@ suite("inpredicate") { order_qt_in_predicate_10 """ SELECT * FROM supplier WHERE s_suppkey not in (15); """ + + order_qt_in_predicate_11 """ + SELECT * FROM supplier WHERE s_suppkey in (15, null); + """ + + order_qt_in_predicate_12 """ + SELECT * FROM supplier WHERE s_suppkey not in (15, null); + """ + + order_qt_in_predicate_13 """ + SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null); + """ + + order_qt_in_predicate_14 """ + SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', null); + """ }