Skip to content

Commit

Permalink
[opt](MultiCast) Avoid copying while holding a lock (#38348)
Browse files Browse the repository at this point in the history
pick #37462
The difference is quite large, so it can't be directly picked.
  • Loading branch information
Mryange authored Jul 29, 2024
1 parent aedec6e commit 535ae2c
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 16 deletions.
62 changes: 47 additions & 15 deletions be/src/pipeline/exec/multi_cast_data_streamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,41 @@

namespace doris::pipeline {

MultiCastBlock::MultiCastBlock(vectorized::Block* block, int used_count, size_t mem_size)
: _used_count(used_count), _mem_size(mem_size) {
MultiCastBlock::MultiCastBlock(vectorized::Block* block, int used_count, int un_finish_copy,
size_t mem_size)
: _used_count(used_count), _un_finish_copy(un_finish_copy), _mem_size(mem_size) {
_block = vectorized::Block::create_unique(block->get_columns_with_type_and_name());
block->clear();
}

Status MultiCastDataStreamer::pull(int sender_idx, doris::vectorized::Block* block, bool* eos) {
std::lock_guard l(_mutex);
auto& pos_to_pull = _sender_pos_to_read[sender_idx];
if (pos_to_pull != _multi_cast_blocks.end()) {
if (pos_to_pull->_used_count == 1) {
DCHECK(pos_to_pull == _multi_cast_blocks.begin());
pos_to_pull->_block->swap(*block);
int* un_finish_copy = nullptr;
int use_count = 0;
{
std::lock_guard l(_mutex);
auto& pos_to_pull = _sender_pos_to_read[sender_idx];
const auto end = _multi_cast_blocks.end();
if (pos_to_pull != end) {
*block = *pos_to_pull->_block;

_cumulative_mem_size -= pos_to_pull->_mem_size;
pos_to_pull++;
_multi_cast_blocks.pop_front();
} else {
pos_to_pull->_block->create_same_struct_block(0)->swap(*block);
RETURN_IF_ERROR(vectorized::MutableBlock(block).merge(*pos_to_pull->_block));
pos_to_pull->_used_count--;
use_count = pos_to_pull->_used_count;
un_finish_copy = &pos_to_pull->_un_finish_copy;

pos_to_pull++;
}
*eos = _eos and pos_to_pull == end;
}

if (un_finish_copy) {
if (use_count == 0) {
// will clear _multi_cast_blocks
_wait_copy_block(block, *un_finish_copy);
} else {
_copy_block(block, *un_finish_copy);
}
}
*eos = _eos and pos_to_pull == _multi_cast_blocks.end();
return Status::OK();
}

Expand All @@ -60,12 +70,33 @@ void MultiCastDataStreamer::close_sender(int sender_idx) {
_multi_cast_blocks.pop_front();
} else {
pos_to_pull->_used_count--;
pos_to_pull->_un_finish_copy--;
pos_to_pull++;
}
}
_closed_sender_count++;
}

void MultiCastDataStreamer::_copy_block(vectorized::Block* block, int& un_finish_copy) {
const auto rows = block->rows();
for (int i = 0; i < block->columns(); ++i) {
block->get_by_position(i).column = block->get_by_position(i).column->clone_resized(rows);
}

std::unique_lock l(_mutex);
un_finish_copy--;
if (un_finish_copy == 0) {
l.unlock();
_cv.notify_one();
}
}

void MultiCastDataStreamer::_wait_copy_block(vectorized::Block* block, int& un_finish_copy) {
std::unique_lock l(_mutex);
_cv.wait(l, [&]() { return un_finish_copy == 0; });
_multi_cast_blocks.pop_front();
}

Status MultiCastDataStreamer::push(RuntimeState* state, doris::vectorized::Block* block, bool eos) {
auto rows = block->rows();
COUNTER_UPDATE(_process_rows, rows);
Expand All @@ -79,7 +110,8 @@ Status MultiCastDataStreamer::push(RuntimeState* state, doris::vectorized::Block
// TODO: if the [queue back block rows + block->rows()] < batch_size, better
// do merge block. but need check the need_process_count and used_count whether
// equal
_multi_cast_blocks.emplace_back(block, need_process_count, block_mem_size);
_multi_cast_blocks.emplace_back(block, need_process_count, need_process_count - 1,
block_mem_size);
_cumulative_mem_size += block_mem_size;
COUNTER_SET(_peak_mem_usage, std::max(_cumulative_mem_size, _peak_mem_usage->value()));

Expand Down
9 changes: 8 additions & 1 deletion be/src/pipeline/exec/multi_cast_data_streamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
namespace doris::pipeline {

struct MultiCastBlock {
MultiCastBlock(vectorized::Block* block, int used_count, size_t mem_size);
MultiCastBlock(vectorized::Block* block, int used_count, int need_copy, size_t mem_size);

std::unique_ptr<vectorized::Block> _block;
int _used_count;
int _un_finish_copy;
size_t _mem_size;
};

Expand Down Expand Up @@ -68,6 +69,12 @@ class MultiCastDataStreamer {
}

private:
void _copy_block(vectorized::Block* block, int& un_finish_copy);

void _wait_copy_block(vectorized::Block* block, int& un_finish_copy);

std::condition_variable _cv;

const RowDescriptor& _row_desc;
RuntimeProfile* _profile;
std::list<MultiCastBlock> _multi_cast_blocks;
Expand Down

0 comments on commit 535ae2c

Please sign in to comment.