diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index 99ce1105fef397..cc3e634d672dd6 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -257,6 +257,37 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod { return EmplaceResult(inserted); } + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_key( + Data& data, size_t row, Arena& pool, Func&& f, CreatorForNull&& null_creator) { + if (key_columns[0]->is_null_at(row)) { + bool has_null_key = data.has_null_key_data(); + data.has_null_key_data() = true; + if (!has_null_key) std::forward(null_creator)(data.get_null_key_data()); + return data.get_null_key_data(); + } + auto key_holder = Base::get_key_holder(row, pool); + typename Data::LookupResult it; + data.lazy_emplace(key_holder, it, std::forward(f)); + return *lookup_result_get_mapped(it); + } + + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_key( + Data& data, size_t row, Arena& pool, size_t hash_value, Func&& f, + CreatorForNull&& null_creator) { + if (key_columns[0]->is_null_at(row)) { + bool has_null_key = data.has_null_key_data(); + data.has_null_key_data() = true; + if (!has_null_key) std::forward(null_creator)(data.get_null_key_data()); + return data.get_null_key_data(); + } + auto key_holder = Base::get_key_holder(row, pool); + typename Data::LookupResult it; + data.lazy_emplace(key_holder, it, hash_value, std::forward(f)); + return *lookup_result_get_mapped(it); + } + template ALWAYS_INLINE FindResult find_key(Data& data, size_t row, Arena& pool) { if (key_columns[0]->is_null_at(row)) { @@ -276,5 +307,16 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod { } }; +template +struct IsSingleNullableColumnMethod { + static constexpr bool value = false; +}; + +template +struct IsSingleNullableColumnMethod< + HashMethodSingleLowNullableColumn> { + static constexpr bool value = true; +}; + } // namespace ColumnsHashing } // namespace doris::vectorized diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index 5b5f13b86a905a..7bf4fd31327ad2 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -140,6 +140,22 @@ class HashMethodBase { return emplaceImpl(key_holder, hash_value, data); } + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_key(Data& data, + size_t row, + Arena& pool, + Func&& f) { + auto key_holder = static_cast(*this).get_key_holder(row, pool); + return lazy_emplace_impl(key_holder, data, std::forward(f)); + } + + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_key( + Data& data, size_t hash_value, size_t row, Arena& pool, Func&& f) { + auto key_holder = static_cast(*this).get_key_holder(row, pool); + return lazy_emplace_impl(key_holder, hash_value, data, std::forward(f)); + } + template ALWAYS_INLINE FindResult find_key(Data& data, size_t row, Arena& pool) { auto key_holder = static_cast(*this).get_key_holder(row, pool); @@ -264,6 +280,23 @@ class HashMethodBase { return EmplaceResult(inserted); } + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_impl( + KeyHolder& key_holder, Data& data, Func&& f) { + typename Data::LookupResult it; + data.lazy_emplace(key_holder, it, std::forward(f)); + return *lookup_result_get_mapped(it); + } + + template + ALWAYS_INLINE typename std::enable_if_t& lazy_emplace_impl( + KeyHolder& key_holder, size_t hash_value, Data& data, Func&& f) { + typename Data::LookupResult it; + data.lazy_emplace(key_holder, it, hash_value, std::forward(f)); + + return *lookup_result_get_mapped(it); + } + template ALWAYS_INLINE FindResult find_key_impl(Key key, Data& data) { if constexpr (Cache::consecutive_keys_optimization) { diff --git a/be/src/vec/common/hash_table/fixed_hash_map.h b/be/src/vec/common/hash_table/fixed_hash_map.h index 1985885dda2745..164411550c1fb7 100644 --- a/be/src/vec/common/hash_table/fixed_hash_map.h +++ b/be/src/vec/common/hash_table/fixed_hash_map.h @@ -78,6 +78,7 @@ struct FixedHashMapImplicitZeroCell { FixedHashMapImplicitZeroCell() {} FixedHashMapImplicitZeroCell(const Key&, const State&) {} + FixedHashMapImplicitZeroCell(const Key&, const Mapped& mapped_) : mapped(mapped_) {} FixedHashMapImplicitZeroCell(const value_type& value_, const State&) : mapped(value_.second) {} const VoidKey get_first() const { return {}; } diff --git a/be/src/vec/common/hash_table/fixed_hash_table.h b/be/src/vec/common/hash_table/fixed_hash_table.h index 338ae03905b64d..a909403d3b6fde 100644 --- a/be/src/vec/common/hash_table/fixed_hash_table.h +++ b/be/src/vec/common/hash_table/fixed_hash_table.h @@ -269,6 +269,31 @@ class FixedHashTable : private boost::noncopyable, this->increase_size(); } + class Constructor { + public: + friend class FixedHashTable; + template + void operator()(Args&&... args) const { + new (_cell) Cell(std::forward(args)...); + } + + private: + Constructor(Cell* cell) : _cell(cell) {} + Cell* _cell; + }; + + template + void ALWAYS_INLINE lazy_emplace(const Key& x, LookupResult& it, Func&& f) { + it = &buf[x]; + + if (!buf[x].is_zero(*this)) { + return; + } + + f(Constructor(&buf[x]), x); + this->increase_size(); + } + std::pair ALWAYS_INLINE insert(const value_type& x) { std::pair res; emplace(Cell::get_key(x), res.first, res.second); diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index 25e1b74c510755..118b87087d9714 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -59,6 +59,7 @@ struct HashMapCell { HashMapCell() {} HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {} + HashMapCell(const Key& key_, const Mapped& mapped_) : value(key_, mapped_) {} HashMapCell(const value_type& value_, const State&) : value(value_) {} const Key& get_first() const { return value.first; } diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index ce6407f072770f..a59cf972ff0d86 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -765,6 +765,27 @@ class HashTable : private boost::noncopyable, return false; } + template + bool ALWAYS_INLINE lazy_emplace_if_zero(const Key& x, LookupResult& it, size_t hash_value, + Func&& f) { + /// If it is claimed that the zero key can not be inserted into the table. + if (!Cell::need_zero_value_storage) return false; + + if (Cell::is_zero(x, *this)) { + it = this->zero_value(); + if (!this->get_has_zero()) { + ++m_size; + this->set_get_has_zero(); + std::forward(f)(Constructor(it), x); + this->zero_value()->set_hash(hash_value); + } + + return true; + } + + return false; + } + template void ALWAYS_INLINE emplace_non_zero_impl(size_t place_value, KeyHolder&& key_holder, LookupResult& it, bool& inserted, size_t hash_value) { @@ -804,6 +825,43 @@ class HashTable : private boost::noncopyable, } } + template + void ALWAYS_INLINE lazy_emplace_non_zero_impl(size_t place_value, KeyHolder&& key_holder, + LookupResult& it, size_t hash_value, Func&& f) { + it = &buf[place_value]; + + if (!buf[place_value].is_zero(*this)) { + key_holder_discard_key(key_holder); + return; + } + + key_holder_persist_key(key_holder); + const auto& key = key_holder_get_key(key_holder); + + f(Constructor(&buf[place_value]), key); + buf[place_value].set_hash(hash_value); + ++m_size; + + if (UNLIKELY(grower.overflow(m_size))) { + try { + resize(); + } catch (...) { + /** If we have not resized successfully, then there will be problems. + * There remains a key, but uninitialized mapped-value, + * which, perhaps, can not even be called a destructor. + */ + --m_size; + buf[place_value].set_zero(); + throw; + } + + // The hash table was rehashed, so we have to re-find the key. + size_t new_place = find_cell(key, hash_value, grower.place(hash_value)); + assert(!buf[new_place].is_zero(*this)); + it = &buf[new_place]; + } + } + /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter. template void ALWAYS_INLINE emplace_non_zero(KeyHolder&& key_holder, LookupResult& it, bool& inserted, @@ -813,6 +871,14 @@ class HashTable : private boost::noncopyable, emplace_non_zero_impl(place_value, key_holder, it, inserted, hash_value); } + template + void ALWAYS_INLINE lazy_emplace_non_zero(KeyHolder&& key_holder, LookupResult& it, + size_t hash_value, Func&& f) { + const auto& key = key_holder_get_key(key_holder); + size_t place_value = find_cell(key, hash_value, grower.place(hash_value)); + lazy_emplace_non_zero_impl(place_value, key_holder, it, hash_value, std::forward(f)); + } + public: void expanse_for_add_elem(size_t num_elem) { if (add_elem_size_overflow(num_elem)) { @@ -847,6 +913,19 @@ class HashTable : private boost::noncopyable, reinsert(*it.get_ptr(), hash_value); } + class Constructor { + public: + friend class HashTable; + template + void operator()(Args&&... args) const { + new (_cell) Cell(std::forward(args)...); + } + + private: + Constructor(Cell* cell) : _cell(cell) {} + Cell* _cell; + }; + /** Insert the key. * Return values: * 'it' -- a LookupResult pointing to the corresponding key/mapped pair. @@ -877,6 +956,20 @@ class HashTable : private boost::noncopyable, emplace_non_zero(key_holder, it, inserted, hash_value); } + template + void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) { + const auto& key = key_holder_get_key(key_holder); + lazy_emplace(key_holder, it, hash(key), std::forward(f)); + } + + template + void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, + Func&& f) { + const auto& key = key_holder_get_key(key_holder); + if (!lazy_emplace_if_zero(key, it, hash_value, std::forward(f))) + lazy_emplace_non_zero(key_holder, it, hash_value, std::forward(f)); + } + /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet. void ALWAYS_INLINE insert_unique_non_zero(const Cell* cell, size_t hash_value) { size_t place_value = find_empty_cell(grower.place(hash_value)); diff --git a/be/src/vec/common/hash_table/hash_table_utils.h b/be/src/vec/common/hash_table/hash_table_utils.h index e437f070996653..0302b72f3ca56e 100644 --- a/be/src/vec/common/hash_table/hash_table_utils.h +++ b/be/src/vec/common/hash_table/hash_table_utils.h @@ -22,4 +22,5 @@ template struct HashTableTraits { static constexpr bool is_phmap = false; static constexpr bool is_parallel_phmap = false; + static constexpr bool is_string_hash_table = false; }; diff --git a/be/src/vec/common/hash_table/ph_hash_map.h b/be/src/vec/common/hash_table/ph_hash_map.h index 3a56ce7988af70..66596bb333ca98 100644 --- a/be/src/vec/common/hash_table/ph_hash_map.h +++ b/be/src/vec/common/hash_table/ph_hash_map.h @@ -118,6 +118,16 @@ class PHHashMap : private boost::noncopyable { it = &*it_; } + template + void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) { + const auto& key = key_holder_get_key(key_holder); + auto it_ = _hash_map.lazy_emplace(key, [&](const auto& ctor) { + key_holder_persist_key(key_holder); + f(ctor, key); + }); + it = &*it_; + } + template void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, bool& inserted) { @@ -140,6 +150,25 @@ class PHHashMap : private boost::noncopyable { } } + template + void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, + Func&& f) { + const auto& key = key_holder_get_key(key_holder); + if constexpr (use_parallel) { + auto it_ = _hash_map.lazy_emplace_with_hash(hash_value, key, [&](const auto& ctor) { + key_holder_persist_key(key_holder); + f(ctor, key); + }); + it = &*it_; + } else { + auto it_ = _hash_map.lazy_emplace_with_hash(key, hash_value, [&](const auto& ctor) { + key_holder_persist_key(key_holder); + f(ctor, key); + }); + it = &*it_; + } + } + template LookupResult ALWAYS_INLINE find(KeyHolder&& key_holder) { const auto& key = key_holder_get_key(key_holder); @@ -197,4 +226,5 @@ template struct HashTableTraits> { static constexpr bool is_phmap = true; static constexpr bool is_parallel_phmap = use_parallel; + static constexpr bool is_string_hash_table = false; }; diff --git a/be/src/vec/common/hash_table/string_hash_map.h b/be/src/vec/common/hash_table/string_hash_map.h index 4e2ca786bb00ae..344f468dec1237 100644 --- a/be/src/vec/common/hash_table/string_hash_map.h +++ b/be/src/vec/common/hash_table/string_hash_map.h @@ -209,3 +209,17 @@ class StringHashMap : public StringHashTable +struct HashTableTraits> { + static constexpr bool is_phmap = false; + static constexpr bool is_parallel_phmap = false; + static constexpr bool is_string_hash_table = true; +}; + +template