Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](memtracker) Refactor load channel mem tracker to improve accuracy #12791

Merged
merged 2 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions be/src/olap/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,13 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label)
#ifndef BE_TEST
_mem_tracker = std::make_shared<MemTrackerLimiter>(
-1, label, StorageEngine::instance()->compaction_mem_tracker());
_mem_tracker->enable_reset_zero();
#else
_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, label);
#endif
}

Compaction::~Compaction() {
#ifndef BE_TEST
// Compaction tracker cannot be completely accurate, offset the global impact.
StorageEngine::instance()->compaction_mem_tracker()->cache_consume_local(
-_mem_tracker->consumption());
#endif
}
Compaction::~Compaction() {}

Status Compaction::compact() {
RETURN_NOT_OK(prepare_compact());
Expand Down
12 changes: 2 additions & 10 deletions be/src/olap/delta_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,6 @@ Status DeltaWriter::write(const RowBatch* row_batch, const std::vector<int>& row
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);

for (const auto& row_idx : row_idxs) {
_mem_table->insert(row_batch->get_row(row_idx)->get_tuple(0));
}
Expand All @@ -218,7 +216,6 @@ Status DeltaWriter::write(const vectorized::Block* block, const std::vector<int>
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
_mem_table->insert(block, row_idxs);

if (_mem_table->need_to_agg()) {
Expand All @@ -236,7 +233,7 @@ Status DeltaWriter::_flush_memtable_async() {
if (++_segment_counter > config::max_segment_num_per_rowset) {
return Status::OLAPInternalError(OLAP_ERR_TOO_MANY_SEGMENTS);
}
return _flush_token->submit(std::move(_mem_table), _mem_tracker);
return _flush_token->submit(std::move(_mem_table));
}

Status DeltaWriter::flush_memtable_and_wait(bool need_wait) {
Expand All @@ -253,7 +250,6 @@ Status DeltaWriter::flush_memtable_and_wait(bool need_wait) {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
if (_flush_token->get_stats().flush_running_count == 0) {
// equal means there is no memtable in flush queue, just flush this memtable
VLOG_NOTICE << "flush memtable to reduce mem consumption. memtable size: "
Expand Down Expand Up @@ -290,7 +286,7 @@ void DeltaWriter::_reset_mem_table() {
}
_mem_table.reset(new MemTable(_tablet, _schema.get(), _tablet_schema.get(), _req.slots,
_req.tuple_desc, _rowset_writer.get(), _delete_bitmap,
_rowset_ids, _cur_max_version, _is_vec));
_rowset_ids, _cur_max_version, _mem_tracker, _is_vec));
}

Status DeltaWriter::close() {
Expand All @@ -308,7 +304,6 @@ Status DeltaWriter::close() {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
RETURN_NOT_OK(_flush_memtable_async());
_mem_table.reset();
return Status::OK();
Expand All @@ -323,8 +318,6 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
if (_is_cancelled) {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
// return error if previous flush failed
RETURN_NOT_OK(_flush_token->wait());

Expand Down Expand Up @@ -384,7 +377,6 @@ Status DeltaWriter::cancel() {
if (!_is_init || _is_cancelled) {
return Status::OK();
}
SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
_mem_table.reset();
if (_flush_token != nullptr) {
// cancel and wait all memtables in flush queue to be finished
Expand Down
28 changes: 18 additions & 10 deletions be/src/olap/memtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t
const std::vector<SlotDescriptor*>* slot_descs, TupleDescriptor* tuple_desc,
RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowset_ids, int64_t cur_max_version,
bool support_vec)
const std::shared_ptr<MemTrackerLimiter>& tracker, bool support_vec)
: _tablet(std::move(tablet)),
_schema(schema),
_tablet_schema(tablet_schema),
_slot_descs(slot_descs),
_mem_tracker(std::make_unique<MemTracker>(
fmt::format("MemTable:tabletId={}", std::to_string(tablet_id())))),
_buffer_mem_pool(new MemPool(_mem_tracker.get())),
_table_mem_pool(new MemPool(_mem_tracker.get())),
_mem_tracker_hook(std::make_shared<MemTrackerLimiter>(
-1, fmt::format("MemTableHook:tabletId={}", std::to_string(tablet_id())),
tracker)),
_schema_size(_schema->schema_size()),
_rowset_writer(rowset_writer),
_is_first_insertion(true),
Expand All @@ -53,6 +52,12 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t
_delete_bitmap(delete_bitmap),
_rowset_ids(rowset_ids),
_cur_max_version(cur_max_version) {
_mem_tracker_hook->enable_reset_zero();
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
_mem_tracker_manual = std::make_unique<MemTracker>(
fmt::format("MemTableManual:tabletId={}", std::to_string(tablet_id())));
_buffer_mem_pool = std::make_unique<MemPool>(_mem_tracker_manual.get());
_table_mem_pool = std::make_unique<MemPool>(_mem_tracker_manual.get());
if (support_vec) {
_skip_list = nullptr;
_vec_row_comparator = std::make_shared<RowInBlockComparator>(_schema);
Expand Down Expand Up @@ -147,12 +152,12 @@ MemTable::~MemTable() {
}
}
std::for_each(_row_in_blocks.begin(), _row_in_blocks.end(), std::default_delete<RowInBlock>());
_mem_tracker->release(_mem_usage);
_mem_tracker_manual->release(_mem_usage);
_buffer_mem_pool->free_all();
_table_mem_pool->free_all();
DCHECK_EQ(_mem_tracker->consumption(), 0)
DCHECK_EQ(_mem_tracker_manual->consumption(), 0)
<< std::endl
<< MemTracker::log_usage(_mem_tracker->make_snapshot(0));
<< MemTracker::log_usage(_mem_tracker_manual->make_snapshot(0));
}

MemTable::RowCursorComparator::RowCursorComparator(const Schema* schema) : _schema(schema) {}
Expand All @@ -170,6 +175,7 @@ int MemTable::RowInBlockComparator::operator()(const RowInBlock* left,
}

void MemTable::insert(const vectorized::Block* input_block, const std::vector<int>& row_idxs) {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
auto target_block = input_block->copy_block(_column_offset);
if (_is_first_insertion) {
_is_first_insertion = false;
Expand All @@ -186,7 +192,7 @@ void MemTable::insert(const vectorized::Block* input_block, const std::vector<in
_input_mutable_block.add_rows(&target_block, row_idxs.data(), row_idxs.data() + num_rows);
size_t input_size = target_block.allocated_bytes() * num_rows / target_block.rows();
_mem_usage += input_size;
_mem_tracker->consume(input_size);
_mem_tracker_manual->consume(input_size);

for (int i = 0; i < num_rows; i++) {
_row_in_blocks.emplace_back(new RowInBlock {cursor_in_mutableblock + i});
Expand Down Expand Up @@ -367,7 +373,7 @@ void MemTable::_collect_vskiplist_results() {
if constexpr (!is_final) {
// if is not final, we collect the agg results to input_block and then continue to insert
size_t shrunked_after_agg = _output_mutable_block.allocated_bytes();
_mem_tracker->consume(shrunked_after_agg - _mem_usage);
_mem_tracker_manual->consume(shrunked_after_agg - _mem_usage);
_mem_usage = shrunked_after_agg;
_input_mutable_block.swap(_output_mutable_block);
//TODO(weixang):opt here.
Expand All @@ -385,6 +391,7 @@ void MemTable::_collect_vskiplist_results() {
}

void MemTable::shrink_memtable_by_agg() {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
if (keys_type() == KeysType::DUP_KEYS) {
return;
}
Expand Down Expand Up @@ -421,6 +428,7 @@ Status MemTable::_generate_delete_bitmap() {
}

Status MemTable::flush() {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
VLOG_CRITICAL << "begin to flush memtable for tablet: " << tablet_id()
<< ", memsize: " << memory_usage() << ", rows: " << _rows;
int64_t duration_ns = 0;
Expand Down
13 changes: 9 additions & 4 deletions be/src/olap/memtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,18 @@ class MemTable {
const std::vector<SlotDescriptor*>* slot_descs, TupleDescriptor* tuple_desc,
RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowset_ids, int64_t cur_max_version,
bool support_vec = false);
const std::shared_ptr<MemTrackerLimiter>& tracker, bool support_vec = false);
~MemTable();

int64_t tablet_id() const { return _tablet->tablet_id(); }
KeysType keys_type() const { return _tablet->keys_type(); }
size_t memory_usage() const { return _mem_tracker->consumption(); }
std::shared_ptr<MemTrackerLimiter> mem_tracker_hook() const { return _mem_tracker_hook; }
size_t memory_usage() const { return _mem_tracker_manual->consumption(); }

inline void insert(const Tuple* tuple) { (this->*_insert_fn)(tuple); }
inline void insert(const Tuple* tuple) {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
(this->*_insert_fn)(tuple);
}
// insert tuple from (row_pos) to (row_pos+num_rows)
void insert(const vectorized::Block* block, const std::vector<int>& row_idxs);

Expand Down Expand Up @@ -157,7 +161,8 @@ class MemTable {

std::shared_ptr<RowInBlockComparator> _vec_row_comparator;

std::unique_ptr<MemTracker> _mem_tracker;
std::unique_ptr<MemTracker> _mem_tracker_manual;
std::shared_ptr<MemTrackerLimiter> _mem_tracker_hook;
// This is a buffer, to hold the memory referenced by the rows that have not
// been inserted into the SkipList
std::unique_ptr<MemPool> _buffer_mem_pool;
Expand Down
13 changes: 4 additions & 9 deletions be/src/olap/memtable_flush_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,14 @@ namespace doris {
class MemtableFlushTask final : public Runnable {
public:
MemtableFlushTask(FlushToken* flush_token, std::unique_ptr<MemTable> memtable,
int64_t submit_task_time, const std::shared_ptr<MemTrackerLimiter>& tracker)
int64_t submit_task_time)
: _flush_token(flush_token),
_memtable(std::move(memtable)),
_submit_task_time(submit_task_time),
_tracker(tracker) {}
_submit_task_time(submit_task_time) {}

~MemtableFlushTask() override = default;

void run() override {
SCOPED_ATTACH_TASK(_tracker, ThreadContext::TaskType::LOAD);
_flush_token->_flush_memtable(_memtable.get(), _submit_task_time);
_memtable.reset();
}
Expand All @@ -47,7 +45,6 @@ class MemtableFlushTask final : public Runnable {
FlushToken* _flush_token;
std::unique_ptr<MemTable> _memtable;
int64_t _submit_task_time;
std::shared_ptr<MemTrackerLimiter> _tracker;
};

std::ostream& operator<<(std::ostream& os, const FlushStatistic& stat) {
Expand All @@ -60,15 +57,13 @@ std::ostream& operator<<(std::ostream& os, const FlushStatistic& stat) {
return os;
}

Status FlushToken::submit(std::unique_ptr<MemTable> mem_table,
const std::shared_ptr<MemTrackerLimiter>& tracker) {
Status FlushToken::submit(std::unique_ptr<MemTable> mem_table) {
ErrorCode s = _flush_status.load();
if (s != OLAP_SUCCESS) {
return Status::OLAPInternalError(s);
}
int64_t submit_task_time = MonotonicNanos();
auto task = std::make_shared<MemtableFlushTask>(this, std::move(mem_table), submit_task_time,
tracker);
auto task = std::make_shared<MemtableFlushTask>(this, std::move(mem_table), submit_task_time);
_stats.flush_running_count++;
return _flush_token->submit(std::move(task));
}
Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/memtable_flush_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ class FlushToken {
explicit FlushToken(std::unique_ptr<ThreadPoolToken> flush_pool_token)
: _flush_token(std::move(flush_pool_token)), _flush_status(OLAP_SUCCESS) {}

Status submit(std::unique_ptr<MemTable> mem_table,
const std::shared_ptr<MemTrackerLimiter>& tracker);
Status submit(std::unique_ptr<MemTable> mem_table);

// error has happpens, so we cancel this token
// And remove all tasks in the queue.
Expand Down
3 changes: 1 addition & 2 deletions be/src/runtime/load_channel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ LoadChannel::LoadChannel(const UniqueId& load_id, std::shared_ptr<MemTrackerLimi
// _load_channels in load_channel_mgr, or it may be erased
// immediately by gc thread.
_last_updated_time.store(time(nullptr));
_mem_tracker->enable_reset_zero();
}

LoadChannel::~LoadChannel() {
LOG(INFO) << "load channel removed. mem peak usage=" << _mem_tracker->peak_consumption()
<< ", info=" << _mem_tracker->debug_string() << ", load_id=" << _load_id
<< ", is high priority=" << _is_high_priority << ", sender_ip=" << _sender_ip
<< ", is_vec=" << _is_vec;
// Load channel tracker cannot be completely accurate, offsetting the impact on the load channel mgr tracker.
_mem_tracker->parent()->cache_consume_local(-_mem_tracker->consumption());
}

Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
Expand Down
5 changes: 3 additions & 2 deletions be/src/runtime/memory/mem_tracker_limiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ MemTrackerLimiter::~MemTrackerLimiter() {
// the first layer: process;
// the second layer: a tracker that will not be destructed globally (query/load pool, load channel mgr, etc.);
// the third layer: a query/load/compaction task generates a tracker (query tracker, load channel tracker, etc.).
if (_parent->parent()->label() == "Process") {
if ((_parent && _parent->label() == "Process") ||
(_parent->parent() && _parent->parent()->label() == "Process")) {
ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local(
_consumption->current_value());
}
#endif

if (_reset_zero) cache_consume_local(-_consumption->current_value());
if (_parent) {
std::lock_guard<std::mutex> l(_parent->_child_tracker_limiter_lock);
if (_child_tracker_it != _parent->_child_tracker_limiters.end()) {
Expand Down
6 changes: 6 additions & 0 deletions be/src/runtime/memory/mem_tracker_limiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class MemTrackerLimiter final : public MemTracker {
}

void enable_print_log_usage() { _print_log_usage = true; }
void enable_reset_zero() { _reset_zero = true; }

// Logs the usage of this tracker limiter and optionally its children (recursively).
// If 'logged_consumption' is non-nullptr, sets the consumption value logged.
Expand Down Expand Up @@ -250,6 +251,11 @@ class MemTrackerLimiter final : public MemTracker {
std::atomic_size_t _had_child_count = 0;

bool _print_log_usage = false;
// mem hook record tracker cannot guarantee that the final consumption is 0,
// nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence.
// In some cases, in order to avoid the cumulative error of the upper global tracker,
// the consumption of the current tracker is reset to zero.
bool _reset_zero = false;
};

inline void MemTrackerLimiter::consume(int64_t bytes) {
Expand Down
12 changes: 5 additions & 7 deletions be/src/runtime/memory/mem_tracker_task_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,12 @@ void MemTrackerTaskPool::logout_task_mem_tracker() {
// between the two trackers.
// At present, it is impossible to effectively locate which memory consume and release on different trackers,
// so query memory leaks cannot be found.
//
// In order to ensure that the query pool mem tracker is the sum of all currently running query mem trackers,
// the effect of the ended query mem tracker on the query pool mem tracker should be cleared, that is,
// the negative number of the current value of consume.
it->second->parent()->cache_consume_local(-it->second->consumption());
LOG(INFO) << fmt::format(
"Deregister query/load memory tracker, queryId={}, Limit={}, PeakUsed={}",
it->first, it->second->limit(), it->second->peak_consumption());
"Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
"PeakUsed={}",
it->first, PrettyPrinter::print(it->second->limit(), TUnit::BYTES),
PrettyPrinter::print(it->second->consumption(), TUnit::BYTES),
PrettyPrinter::print(it->second->peak_consumption(), TUnit::BYTES));
expired_task_ids.emplace_back(it->first);
}
}
Expand Down
1 change: 1 addition & 0 deletions be/src/runtime/runtime_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
DCHECK(false);
_query_mem_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker();
}
_query_mem_tracker->enable_reset_zero();

_instance_mem_tracker = std::make_shared<MemTrackerLimiter>(
-1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker,
Expand Down
2 changes: 2 additions & 0 deletions be/src/runtime/thread_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,11 @@ class ThreadContext {
void attach_task(const TaskType& type, const std::string& task_id,
const TUniqueId& fragment_instance_id,
const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
#ifndef BE_TEST
DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) && _task_id == "")
<< ",new tracker label: " << mem_tracker->label() << ",old tracker label: "
<< _thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label();
#endif
DCHECK(type != TaskType::UNKNOWN);
_type = type;
_task_id = task_id;
Expand Down
2 changes: 2 additions & 0 deletions be/src/util/mem_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ int64_t MemInfo::_s_mem_limit = -1;
std::string MemInfo::_s_mem_limit_str = "";
int64_t MemInfo::_s_hard_mem_limit = -1;
size_t MemInfo::_s_allocator_physical_mem = 0;
size_t MemInfo::_s_pageheap_unmapped_bytes = 0;
size_t MemInfo::_s_tcmalloc_pageheap_free_bytes = 0;
size_t MemInfo::_s_tcmalloc_central_bytes = 0;
size_t MemInfo::_s_tcmalloc_transfer_bytes = 0;
size_t MemInfo::_s_tcmalloc_thread_bytes = 0;
size_t MemInfo::_s_allocator_cache_mem = 0;
std::string MemInfo::_s_allocator_cache_mem_str = "";
size_t MemInfo::_s_virtual_memory_used = 0;

void MemInfo::init() {
// Read from /proc/meminfo
Expand Down
Loading