Skip to content

Commit

Permalink
[fix](file cache) Fix slow IO for table stats procedure, introduced by
Browse files Browse the repository at this point in the history
…apache#37141

Session variable `disable_file_cache` is processed as "disposable file cache"
in beta_rowset_reader.cpp.

```
if (_read_context->runtime_state != nullptr) {
    _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id();
    _read_options.io_ctx.read_file_cache =
            _read_context->runtime_state->query_options().enable_file_cache;
    _read_options.io_ctx.is_disposable =
            _read_context->runtime_state->query_options().disable_file_cache;
}
```

We use disposable cache to avoid IO amp and avoid large amount of
eviction from the cached data ("normal cache").

We cannot set the read option cache policy to "no cache" because it may
cause IO amp: every page IO will cause a remote IO, which is a
performance disaster.
  • Loading branch information
gavinchou committed Aug 8, 2024
1 parent 6de038b commit bbe0bd1
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 33 deletions.
7 changes: 2 additions & 5 deletions be/src/olap/parallel_scanner_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,14 @@ Status ParallelScannerBuilder::_load() {
bool enable_segment_cache = _state->query_options().__isset.enable_segment_cache
? _state->query_options().enable_segment_cache
: true;
bool disable_file_cache = _state->query_options().__isset.disable_file_cache
? _state->query_options().disable_file_cache
: false;
for (auto& rowset : rowsets) {
RETURN_IF_ERROR(rowset->load());
const auto rowset_id = rowset->rowset_id();
auto& segment_cache_handle = _segment_cache_handles[rowset_id];

RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
std::dynamic_pointer_cast<BetaRowset>(rowset), &segment_cache_handle,
enable_segment_cache, false, disable_file_cache));
enable_segment_cache, false));
_total_rows += rowset->num_rows();
}
}
Expand All @@ -211,4 +208,4 @@ std::shared_ptr<NewOlapScanner> ParallelScannerBuilder::_build_scanner(
return NewOlapScanner::create_shared(_parent, std::move(params));
}

} // namespace doris
} // namespace doris
18 changes: 7 additions & 11 deletions be/src/olap/rowset/beta_rowset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,26 +147,23 @@ Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) {
return Status::OK();
}

Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments,
bool disable_file_cache) {
return load_segments(0, num_segments(), segments, disable_file_cache);
Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments) {
return load_segments(0, num_segments(), segments);
}

Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end,
std::vector<segment_v2::SegmentSharedPtr>* segments,
bool disable_file_cache) {
std::vector<segment_v2::SegmentSharedPtr>* segments) {
int64_t seg_id = seg_id_begin;
while (seg_id < seg_id_end) {
std::shared_ptr<segment_v2::Segment> segment;
RETURN_IF_ERROR(load_segment(seg_id, &segment, disable_file_cache));
RETURN_IF_ERROR(load_segment(seg_id, &segment));
segments->push_back(std::move(segment));
seg_id++;
}
return Status::OK();
}

Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment,
bool disable_file_cache) {
Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment) {
auto fs = _rowset_meta->fs();
if (!fs) {
return Status::Error<INIT_FAILED>("get fs failed");
Expand All @@ -175,9 +172,8 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se
DCHECK(seg_id >= 0);
auto seg_path = DORIS_TRY(segment_path(seg_id));
io::FileReaderOptions reader_options {
.cache_type = !disable_file_cache && config::enable_file_cache
? io::FileCachePolicy::FILE_BLOCK_CACHE
: io::FileCachePolicy::NO_CACHE,
.cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE
: io::FileCachePolicy::NO_CACHE,
.is_doris_table = true,
.cache_base_path = "",
.file_size = _rowset_meta->segment_file_size(seg_id),
Expand Down
9 changes: 3 additions & 6 deletions be/src/olap/rowset/beta_rowset.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,12 @@ class BetaRowset final : public Rowset {

Status check_file_exist() override;

Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments,
bool disable_file_cache = false);
Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments);

Status load_segments(int64_t seg_id_begin, int64_t seg_id_end,
std::vector<segment_v2::SegmentSharedPtr>* segments,
bool disable_file_cache = false);
std::vector<segment_v2::SegmentSharedPtr>* segments);

Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment,
bool disable_file_cache = false);
Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);

Status get_segments_size(std::vector<size_t>* segments_size);

Expand Down
10 changes: 3 additions & 7 deletions be/src/olap/rowset/beta_rowset_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
}

// load segments
bool disable_file_cache = false;
bool enable_segment_cache = true;
auto* state = read_context->runtime_state;
if (state != nullptr) {
disable_file_cache = state->query_options().__isset.disable_file_cache
? state->query_options().disable_file_cache
: false;
enable_segment_cache = state->query_options().__isset.enable_segment_cache
? state->query_options().enable_segment_cache
: true;
Expand All @@ -264,9 +260,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY &&
enable_segment_cache);
SegmentCacheHandle segment_cache_handle;
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
_rowset, &segment_cache_handle, should_use_cache,
/*need_load_pk_index_and_bf*/ false, disable_file_cache));
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &segment_cache_handle,
should_use_cache,
/*need_load_pk_index_and_bf*/ false));

// create iterator for each segment
auto& segments = segment_cache_handle.get_segments();
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/segment_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) {

Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset,
SegmentCacheHandle* cache_handle, bool use_cache,
bool need_load_pk_index_and_bf, bool disable_file_cache) {
bool need_load_pk_index_and_bf) {
if (cache_handle->is_inited()) {
return Status::OK();
}
Expand All @@ -62,7 +62,7 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset,
continue;
}
segment_v2::SegmentSharedPtr segment;
RETURN_IF_ERROR(rowset->load_segment(i, &segment, disable_file_cache));
RETURN_IF_ERROR(rowset->load_segment(i, &segment));
if (need_load_pk_index_and_bf) {
RETURN_IF_ERROR(segment->load_pk_index_and_bf());
}
Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/segment_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ class SegmentLoader {
// Load segments of "rowset", return the "cache_handle" which contains segments.
// If use_cache is true, it will be loaded from _cache.
Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle,
bool use_cache = false, bool need_load_pk_index_and_bf = false,
bool disable_file_cache = false);
bool use_cache = false, bool need_load_pk_index_and_bf = false);

void erase_segment(const SegmentCache::CacheKey& key);

Expand Down

0 comments on commit bbe0bd1

Please sign in to comment.