Skip to content

Commit

Permalink
[opt](inverted index) Optimization of the initialization process in t…
Browse files Browse the repository at this point in the history
…opn (#37429)

1. reduce the performance cost of initialization
  • Loading branch information
zzzxl1993 committed Jul 21, 2024
1 parent 771c0de commit 87f7ecb
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 11 deletions.
14 changes: 4 additions & 10 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1864,7 +1864,8 @@ Status SegmentIterator::_read_columns(const std::vector<ColumnId>& column_ids,
}

Status SegmentIterator::_init_current_block(
vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns) {
vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns,
uint32_t nrows_read_limit) {
block->clear_column_data(_schema->num_column_ids());

for (size_t i = 0; i < _schema->num_column_ids(); i++) {
Expand All @@ -1884,7 +1885,7 @@ Status SegmentIterator::_init_current_block(
column_desc->path() == nullptr ? "" : column_desc->path()->get_path());
// TODO reuse
current_columns[cid] = file_column_type->create_column();
current_columns[cid]->reserve(_opts.block_row_max);
current_columns[cid]->reserve(nrows_read_limit);
} else {
// the column in block must clear() here to insert new data
if (_is_pred_column[cid] ||
Expand All @@ -1903,7 +1904,7 @@ Status SegmentIterator::_init_current_block(
} else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATETIME) {
current_columns[cid]->set_datetime_type();
}
current_columns[cid]->reserve(_opts.block_row_max);
current_columns[cid]->reserve(nrows_read_limit);
}
}
}
Expand Down Expand Up @@ -2342,13 +2343,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), nrows_read_limit);
}

DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", {
if (nrows_read_limit != 1) {
return Status::Error<ErrorCode::INTERNAL_ERROR>("topn opt 1 execute failed: {}",
nrows_read_limit);
}
})

RETURN_IF_ERROR(_init_current_block(block, _current_return_columns, nrows_read_limit));
_converted_column_ids.assign(_schema->columns().size(), 0);

Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ class SegmentIterator : public RowwiseIterator {
bool set_block_rowid);
void _replace_version_col(size_t num_rows);
Status _init_current_block(vectorized::Block* block,
std::vector<vectorized::MutableColumnPtr>& non_pred_vector);
std::vector<vectorized::MutableColumnPtr>& non_pred_vector,
uint32_t nrows_read_limit);
uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size);
uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size);
void _output_non_pred_columns(vectorized::Block* block);
Expand Down

0 comments on commit 87f7ecb

Please sign in to comment.