Skip to content

Commit

Permalink
[fix](memtracker) Refactor load channel mem tracker to improve accura…
Browse files Browse the repository at this point in the history
…cy (apache#12791)

The mem hook record tracker cannot guarantee that the final consumption is 0, nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence.

In the life cycle of a memtable from insert to flush, the memory free of hook is more than that of alloc, resulting in tracker consumption less than 0.

In order to avoid the cumulative error of the upper load channel tracker, the memtable tracker consumption is reset to zero on destructor.
  • Loading branch information
xinyiZzz authored and Yijia Su committed Oct 8, 2022
1 parent 72e2a00 commit b51c656
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 53 deletions.
9 changes: 2 additions & 7 deletions be/src/olap/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,13 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label)
#ifndef BE_TEST
_mem_tracker = std::make_shared<MemTrackerLimiter>(
-1, label, StorageEngine::instance()->compaction_mem_tracker());
_mem_tracker->enable_reset_zero();
#else
_mem_tracker = std::make_shared<MemTrackerLimiter>(-1, label);
#endif
}

Compaction::~Compaction() {
#ifndef BE_TEST
// Compaction tracker cannot be completely accurate, offset the global impact.
StorageEngine::instance()->compaction_mem_tracker()->cache_consume_local(
-_mem_tracker->consumption());
#endif
}
Compaction::~Compaction() {}

Status Compaction::compact() {
RETURN_NOT_OK(prepare_compact());
Expand Down
12 changes: 2 additions & 10 deletions be/src/olap/delta_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,6 @@ Status DeltaWriter::write(const RowBatch* row_batch, const std::vector<int>& row
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);

for (const auto& row_idx : row_idxs) {
_mem_table->insert(row_batch->get_row(row_idx)->get_tuple(0));
}
Expand All @@ -218,7 +216,6 @@ Status DeltaWriter::write(const vectorized::Block* block, const std::vector<int>
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
_mem_table->insert(block, row_idxs);

if (_mem_table->need_to_agg()) {
Expand All @@ -236,7 +233,7 @@ Status DeltaWriter::_flush_memtable_async() {
if (++_segment_counter > config::max_segment_num_per_rowset) {
return Status::OLAPInternalError(OLAP_ERR_TOO_MANY_SEGMENTS);
}
return _flush_token->submit(std::move(_mem_table), _mem_tracker);
return _flush_token->submit(std::move(_mem_table));
}

Status DeltaWriter::flush_memtable_and_wait(bool need_wait) {
Expand All @@ -253,7 +250,6 @@ Status DeltaWriter::flush_memtable_and_wait(bool need_wait) {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
if (_flush_token->get_stats().flush_running_count == 0) {
// equal means there is no memtable in flush queue, just flush this memtable
VLOG_NOTICE << "flush memtable to reduce mem consumption. memtable size: "
Expand Down Expand Up @@ -290,7 +286,7 @@ void DeltaWriter::_reset_mem_table() {
}
_mem_table.reset(new MemTable(_tablet, _schema.get(), _tablet_schema.get(), _req.slots,
_req.tuple_desc, _rowset_writer.get(), _delete_bitmap,
_rowset_ids, _cur_max_version, _is_vec));
_rowset_ids, _cur_max_version, _mem_tracker, _is_vec));
}

Status DeltaWriter::close() {
Expand All @@ -308,7 +304,6 @@ Status DeltaWriter::close() {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
RETURN_NOT_OK(_flush_memtable_async());
_mem_table.reset();
return Status::OK();
Expand All @@ -323,8 +318,6 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
if (_is_cancelled) {
return Status::OLAPInternalError(OLAP_ERR_ALREADY_CANCELLED);
}

SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
// return error if previous flush failed
RETURN_NOT_OK(_flush_token->wait());

Expand Down Expand Up @@ -384,7 +377,6 @@ Status DeltaWriter::cancel() {
if (!_is_init || _is_cancelled) {
return Status::OK();
}
SCOPED_ATTACH_TASK(_mem_tracker, ThreadContext::TaskType::LOAD);
_mem_table.reset();
if (_flush_token != nullptr) {
// cancel and wait all memtables in flush queue to be finished
Expand Down
28 changes: 18 additions & 10 deletions be/src/olap/memtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t
const std::vector<SlotDescriptor*>* slot_descs, TupleDescriptor* tuple_desc,
RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowset_ids, int64_t cur_max_version,
bool support_vec)
const std::shared_ptr<MemTrackerLimiter>& tracker, bool support_vec)
: _tablet(std::move(tablet)),
_schema(schema),
_tablet_schema(tablet_schema),
_slot_descs(slot_descs),
_mem_tracker(std::make_unique<MemTracker>(
fmt::format("MemTable:tabletId={}", std::to_string(tablet_id())))),
_buffer_mem_pool(new MemPool(_mem_tracker.get())),
_table_mem_pool(new MemPool(_mem_tracker.get())),
_mem_tracker_hook(std::make_shared<MemTrackerLimiter>(
-1, fmt::format("MemTableHook:tabletId={}", std::to_string(tablet_id())),
tracker)),
_schema_size(_schema->schema_size()),
_rowset_writer(rowset_writer),
_is_first_insertion(true),
Expand All @@ -53,6 +52,12 @@ MemTable::MemTable(TabletSharedPtr tablet, Schema* schema, const TabletSchema* t
_delete_bitmap(delete_bitmap),
_rowset_ids(rowset_ids),
_cur_max_version(cur_max_version) {
_mem_tracker_hook->enable_reset_zero();
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
_mem_tracker_manual = std::make_unique<MemTracker>(
fmt::format("MemTableManual:tabletId={}", std::to_string(tablet_id())));
_buffer_mem_pool = std::make_unique<MemPool>(_mem_tracker_manual.get());
_table_mem_pool = std::make_unique<MemPool>(_mem_tracker_manual.get());
if (support_vec) {
_skip_list = nullptr;
_vec_row_comparator = std::make_shared<RowInBlockComparator>(_schema);
Expand Down Expand Up @@ -147,12 +152,12 @@ MemTable::~MemTable() {
}
}
std::for_each(_row_in_blocks.begin(), _row_in_blocks.end(), std::default_delete<RowInBlock>());
_mem_tracker->release(_mem_usage);
_mem_tracker_manual->release(_mem_usage);
_buffer_mem_pool->free_all();
_table_mem_pool->free_all();
DCHECK_EQ(_mem_tracker->consumption(), 0)
DCHECK_EQ(_mem_tracker_manual->consumption(), 0)
<< std::endl
<< MemTracker::log_usage(_mem_tracker->make_snapshot(0));
<< MemTracker::log_usage(_mem_tracker_manual->make_snapshot(0));
}

MemTable::RowCursorComparator::RowCursorComparator(const Schema* schema) : _schema(schema) {}
Expand All @@ -170,6 +175,7 @@ int MemTable::RowInBlockComparator::operator()(const RowInBlock* left,
}

void MemTable::insert(const vectorized::Block* input_block, const std::vector<int>& row_idxs) {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
auto target_block = input_block->copy_block(_column_offset);
if (_is_first_insertion) {
_is_first_insertion = false;
Expand All @@ -186,7 +192,7 @@ void MemTable::insert(const vectorized::Block* input_block, const std::vector<in
_input_mutable_block.add_rows(&target_block, row_idxs.data(), row_idxs.data() + num_rows);
size_t input_size = target_block.allocated_bytes() * num_rows / target_block.rows();
_mem_usage += input_size;
_mem_tracker->consume(input_size);
_mem_tracker_manual->consume(input_size);

for (int i = 0; i < num_rows; i++) {
_row_in_blocks.emplace_back(new RowInBlock {cursor_in_mutableblock + i});
Expand Down Expand Up @@ -367,7 +373,7 @@ void MemTable::_collect_vskiplist_results() {
if constexpr (!is_final) {
// if is not final, we collect the agg results to input_block and then continue to insert
size_t shrunked_after_agg = _output_mutable_block.allocated_bytes();
_mem_tracker->consume(shrunked_after_agg - _mem_usage);
_mem_tracker_manual->consume(shrunked_after_agg - _mem_usage);
_mem_usage = shrunked_after_agg;
_input_mutable_block.swap(_output_mutable_block);
//TODO(weixang):opt here.
Expand All @@ -385,6 +391,7 @@ void MemTable::_collect_vskiplist_results() {
}

void MemTable::shrink_memtable_by_agg() {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
if (keys_type() == KeysType::DUP_KEYS) {
return;
}
Expand Down Expand Up @@ -421,6 +428,7 @@ Status MemTable::_generate_delete_bitmap() {
}

Status MemTable::flush() {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
VLOG_CRITICAL << "begin to flush memtable for tablet: " << tablet_id()
<< ", memsize: " << memory_usage() << ", rows: " << _rows;
int64_t duration_ns = 0;
Expand Down
13 changes: 9 additions & 4 deletions be/src/olap/memtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,18 @@ class MemTable {
const std::vector<SlotDescriptor*>* slot_descs, TupleDescriptor* tuple_desc,
RowsetWriter* rowset_writer, DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowset_ids, int64_t cur_max_version,
bool support_vec = false);
const std::shared_ptr<MemTrackerLimiter>& tracker, bool support_vec = false);
~MemTable();

int64_t tablet_id() const { return _tablet->tablet_id(); }
KeysType keys_type() const { return _tablet->keys_type(); }
size_t memory_usage() const { return _mem_tracker->consumption(); }
std::shared_ptr<MemTrackerLimiter> mem_tracker_hook() const { return _mem_tracker_hook; }
size_t memory_usage() const { return _mem_tracker_manual->consumption(); }

inline void insert(const Tuple* tuple) { (this->*_insert_fn)(tuple); }
inline void insert(const Tuple* tuple) {
SCOPED_ATTACH_TASK(_mem_tracker_hook, ThreadContext::TaskType::LOAD);
(this->*_insert_fn)(tuple);
}
// insert tuple from (row_pos) to (row_pos+num_rows)
void insert(const vectorized::Block* block, const std::vector<int>& row_idxs);

Expand Down Expand Up @@ -157,7 +161,8 @@ class MemTable {

std::shared_ptr<RowInBlockComparator> _vec_row_comparator;

std::unique_ptr<MemTracker> _mem_tracker;
std::unique_ptr<MemTracker> _mem_tracker_manual;
std::shared_ptr<MemTrackerLimiter> _mem_tracker_hook;
// This is a buffer, to hold the memory referenced by the rows that have not
// been inserted into the SkipList
std::unique_ptr<MemPool> _buffer_mem_pool;
Expand Down
13 changes: 4 additions & 9 deletions be/src/olap/memtable_flush_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,14 @@ namespace doris {
class MemtableFlushTask final : public Runnable {
public:
MemtableFlushTask(FlushToken* flush_token, std::unique_ptr<MemTable> memtable,
int64_t submit_task_time, const std::shared_ptr<MemTrackerLimiter>& tracker)
int64_t submit_task_time)
: _flush_token(flush_token),
_memtable(std::move(memtable)),
_submit_task_time(submit_task_time),
_tracker(tracker) {}
_submit_task_time(submit_task_time) {}

~MemtableFlushTask() override = default;

void run() override {
SCOPED_ATTACH_TASK(_tracker, ThreadContext::TaskType::LOAD);
_flush_token->_flush_memtable(_memtable.get(), _submit_task_time);
_memtable.reset();
}
Expand All @@ -47,7 +45,6 @@ class MemtableFlushTask final : public Runnable {
FlushToken* _flush_token;
std::unique_ptr<MemTable> _memtable;
int64_t _submit_task_time;
std::shared_ptr<MemTrackerLimiter> _tracker;
};

std::ostream& operator<<(std::ostream& os, const FlushStatistic& stat) {
Expand All @@ -60,15 +57,13 @@ std::ostream& operator<<(std::ostream& os, const FlushStatistic& stat) {
return os;
}

Status FlushToken::submit(std::unique_ptr<MemTable> mem_table,
const std::shared_ptr<MemTrackerLimiter>& tracker) {
Status FlushToken::submit(std::unique_ptr<MemTable> mem_table) {
ErrorCode s = _flush_status.load();
if (s != OLAP_SUCCESS) {
return Status::OLAPInternalError(s);
}
int64_t submit_task_time = MonotonicNanos();
auto task = std::make_shared<MemtableFlushTask>(this, std::move(mem_table), submit_task_time,
tracker);
auto task = std::make_shared<MemtableFlushTask>(this, std::move(mem_table), submit_task_time);
_stats.flush_running_count++;
return _flush_token->submit(std::move(task));
}
Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/memtable_flush_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ class FlushToken {
explicit FlushToken(std::unique_ptr<ThreadPoolToken> flush_pool_token)
: _flush_token(std::move(flush_pool_token)), _flush_status(OLAP_SUCCESS) {}

Status submit(std::unique_ptr<MemTable> mem_table,
const std::shared_ptr<MemTrackerLimiter>& tracker);
Status submit(std::unique_ptr<MemTable> mem_table);

// error has happpens, so we cancel this token
// And remove all tasks in the queue.
Expand Down
3 changes: 1 addition & 2 deletions be/src/runtime/load_channel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ LoadChannel::LoadChannel(const UniqueId& load_id, std::shared_ptr<MemTrackerLimi
// _load_channels in load_channel_mgr, or it may be erased
// immediately by gc thread.
_last_updated_time.store(time(nullptr));
_mem_tracker->enable_reset_zero();
}

LoadChannel::~LoadChannel() {
LOG(INFO) << "load channel removed. mem peak usage=" << _mem_tracker->peak_consumption()
<< ", info=" << _mem_tracker->debug_string() << ", load_id=" << _load_id
<< ", is high priority=" << _is_high_priority << ", sender_ip=" << _sender_ip
<< ", is_vec=" << _is_vec;
// Load channel tracker cannot be completely accurate, offsetting the impact on the load channel mgr tracker.
_mem_tracker->parent()->cache_consume_local(-_mem_tracker->consumption());
}

Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
Expand Down
5 changes: 3 additions & 2 deletions be/src/runtime/memory/mem_tracker_limiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ MemTrackerLimiter::~MemTrackerLimiter() {
// the first layer: process;
// the second layer: a tracker that will not be destructed globally (query/load pool, load channel mgr, etc.);
// the third layer: a query/load/compaction task generates a tracker (query tracker, load channel tracker, etc.).
if (_parent->parent()->label() == "Process") {
if ((_parent && _parent->label() == "Process") ||
(_parent->parent() && _parent->parent()->label() == "Process")) {
ExecEnv::GetInstance()->orphan_mem_tracker_raw()->cache_consume_local(
_consumption->current_value());
}
#endif

if (_reset_zero) cache_consume_local(-_consumption->current_value());
if (_parent) {
std::lock_guard<std::mutex> l(_parent->_child_tracker_limiter_lock);
if (_child_tracker_it != _parent->_child_tracker_limiters.end()) {
Expand Down
6 changes: 6 additions & 0 deletions be/src/runtime/memory/mem_tracker_limiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class MemTrackerLimiter final : public MemTracker {
}

void enable_print_log_usage() { _print_log_usage = true; }
void enable_reset_zero() { _reset_zero = true; }

// Logs the usage of this tracker limiter and optionally its children (recursively).
// If 'logged_consumption' is non-nullptr, sets the consumption value logged.
Expand Down Expand Up @@ -250,6 +251,11 @@ class MemTrackerLimiter final : public MemTracker {
std::atomic_size_t _had_child_count = 0;

bool _print_log_usage = false;
// mem hook record tracker cannot guarantee that the final consumption is 0,
// nor can it guarantee that the memory alloc and free are recorded in a one-to-one correspondence.
// In some cases, in order to avoid the cumulative error of the upper global tracker,
// the consumption of the current tracker is reset to zero.
bool _reset_zero = false;
};

inline void MemTrackerLimiter::consume(int64_t bytes) {
Expand Down
12 changes: 5 additions & 7 deletions be/src/runtime/memory/mem_tracker_task_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,12 @@ void MemTrackerTaskPool::logout_task_mem_tracker() {
// between the two trackers.
// At present, it is impossible to effectively locate which memory consume and release on different trackers,
// so query memory leaks cannot be found.
//
// In order to ensure that the query pool mem tracker is the sum of all currently running query mem trackers,
// the effect of the ended query mem tracker on the query pool mem tracker should be cleared, that is,
// the negative number of the current value of consume.
it->second->parent()->cache_consume_local(-it->second->consumption());
LOG(INFO) << fmt::format(
"Deregister query/load memory tracker, queryId={}, Limit={}, PeakUsed={}",
it->first, it->second->limit(), it->second->peak_consumption());
"Deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
"PeakUsed={}",
it->first, PrettyPrinter::print(it->second->limit(), TUnit::BYTES),
PrettyPrinter::print(it->second->consumption(), TUnit::BYTES),
PrettyPrinter::print(it->second->peak_consumption(), TUnit::BYTES));
expired_task_ids.emplace_back(it->first);
}
}
Expand Down
1 change: 1 addition & 0 deletions be/src/runtime/runtime_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
DCHECK(false);
_query_mem_tracker = ExecEnv::GetInstance()->query_pool_mem_tracker();
}
_query_mem_tracker->enable_reset_zero();

_instance_mem_tracker = std::make_shared<MemTrackerLimiter>(
-1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker,
Expand Down
2 changes: 2 additions & 0 deletions be/src/runtime/thread_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,11 @@ class ThreadContext {
void attach_task(const TaskType& type, const std::string& task_id,
const TUniqueId& fragment_instance_id,
const std::shared_ptr<MemTrackerLimiter>& mem_tracker) {
#ifndef BE_TEST
DCHECK((_type == TaskType::UNKNOWN || _type == TaskType::BRPC) && _task_id == "")
<< ",new tracker label: " << mem_tracker->label() << ",old tracker label: "
<< _thread_mem_tracker_mgr->limiter_mem_tracker_raw()->label();
#endif
DCHECK(type != TaskType::UNKNOWN);
_type = type;
_task_id = task_id;
Expand Down
2 changes: 2 additions & 0 deletions be/src/util/mem_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ int64_t MemInfo::_s_mem_limit = -1;
std::string MemInfo::_s_mem_limit_str = "";
int64_t MemInfo::_s_hard_mem_limit = -1;
size_t MemInfo::_s_allocator_physical_mem = 0;
size_t MemInfo::_s_pageheap_unmapped_bytes = 0;
size_t MemInfo::_s_tcmalloc_pageheap_free_bytes = 0;
size_t MemInfo::_s_tcmalloc_central_bytes = 0;
size_t MemInfo::_s_tcmalloc_transfer_bytes = 0;
size_t MemInfo::_s_tcmalloc_thread_bytes = 0;
size_t MemInfo::_s_allocator_cache_mem = 0;
std::string MemInfo::_s_allocator_cache_mem_str = "";
size_t MemInfo::_s_virtual_memory_used = 0;

void MemInfo::init() {
// Read from /proc/meminfo
Expand Down
Loading

0 comments on commit b51c656

Please sign in to comment.