Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solve the problem that when changing the cluster configuration through the joint consensus algorithm, a new leader cannot be elected under abnormal circumstances. #432

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/braft/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ DECLARE_bool(raft_enable_leader_lease);
DEFINE_bool(raft_enable_witness_to_leader, false,
"enable witness temporarily to become leader when leader down accidently");

DEFINE_bool(raft_enable_peer_not_in_conf_can_elec, false,
"enable peer not in the conf can initiate elections");

#ifndef UNIT_TEST
static bvar::Adder<int64_t> g_num_nodes("raft_node_count");
#else
Expand Down Expand Up @@ -1622,10 +1625,12 @@ void NodeImpl::pre_vote(std::unique_lock<raft_mutex_t>* lck, bool triggered) {
" configuration is possibly out of date";
return;
}
if (!_conf.contains(_server_id)) {
if (!FLAGS_raft_enable_peer_not_in_conf_can_elec) {
if (!_conf.contains(_server_id)) {
LOG(WARNING) << "node " << _group_id << ':' << _server_id
<< " can't do pre_vote as it is not in " << _conf.conf;
return;
}
}

int64_t old_term = _current_term;
Expand Down Expand Up @@ -1681,10 +1686,12 @@ void NodeImpl::elect_self(std::unique_lock<raft_mutex_t>* lck,
bool old_leader_stepped_down) {
LOG(INFO) << "node " << _group_id << ":" << _server_id
<< " term " << _current_term << " start vote and grant vote self";
if (!_conf.contains(_server_id)) {
if (!FLAGS_raft_enable_peer_not_in_conf_can_elec) {
if (!_conf.contains(_server_id)) {
LOG(WARNING) << "node " << _group_id << ':' << _server_id
<< " can't do elect_self as it is not in " << _conf.conf;
return;
}
}
// cancel follower election timer
if (_state == STATE_FOLLOWER) {
Expand Down Expand Up @@ -2393,6 +2400,16 @@ void NodeImpl::handle_append_entries_request(brpc::Controller* cntl,
brpc::ClosureGuard done_guard(done);
std::unique_lock<raft_mutex_t> lck(_mutex);

// for test
const int64_t reject_log_index = get_reject_log_index();
if (reject_log_index > 0 &&
request->prev_log_index() + 1 >= reject_log_index) {
// _last_leader_timestamp = butil::monotonic_time_ms();
// don't interfere check_dead_nodes
cntl->SetFailed(EBUSY, "handle_append_entries_request reject_log_index");
return;
}

// pre set term, to avoid get term in lock
response->set_term(_current_term);

Expand Down Expand Up @@ -2894,6 +2911,10 @@ void NodeImpl::get_status(NodeStatus* status) {
}
}

void NodeImpl::get_log_mgr_status(LogManagerStatus* log_manager_status) {
_log_manager->get_status(log_manager_status);
}

void NodeImpl::stop_replicator(const std::set<PeerId>& keep,
const std::set<PeerId>& drop) {
for (std::set<PeerId>::const_iterator
Expand Down
9 changes: 9 additions & 0 deletions src/braft/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ friend class VoteBallotCtx;
// see from the website, which is generated by |describe| actually.
void get_status(NodeStatus* status);

void get_log_mgr_status(LogManagerStatus* log_manager_status);

// Readonly mode func
void enter_readonly_mode();
void leave_readonly_mode();
Expand Down Expand Up @@ -241,6 +243,10 @@ friend class VoteBallotCtx;

bool disable_cli() const { return _options.disable_cli; }
bool is_witness() const { return _options.witness; }

// for test
void set_reject_log_index(const int64_t log_index) { reject_log_index_ = log_index; }
int64_t get_reject_log_index() const { return reject_log_index_; }
private:
friend class butil::RefCountedThreadSafe<NodeImpl>;

Expand Down Expand Up @@ -533,6 +539,9 @@ friend class butil::RefCountedThreadSafe<NodeImpl>;

LeaderLease _leader_lease;
FollowerLease _follower_lease;

// for test
int64_t reject_log_index_ {0};
};

}
Expand Down
12 changes: 12 additions & 0 deletions src/braft/raft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ void Node::get_status(NodeStatus* status) {
return _impl->get_status(status);
}

void Node::get_log_mgr_status(LogManagerStatus* log_manager_status) {
return _impl->get_log_mgr_status(log_manager_status);
}

void Node::enter_readonly_mode() {
return _impl->enter_readonly_mode();
}
Expand All @@ -242,6 +246,14 @@ bool Node::readonly() {
return _impl->readonly();
}

void Node::set_reject_log_index(const int64_t log_index) {
_impl->set_reject_log_index(log_index);
}

int64_t Node::get_reject_log_index() const {
return _impl->get_reject_log_index();
}

// ------------- Iterator
void Iterator::next() {
if (valid()) {
Expand Down
7 changes: 7 additions & 0 deletions src/braft/raft.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LeaderChangeContext;
class FileSystemAdaptor;
class SnapshotThrottle;
class LogStorage;
class LogManagerStatus;

const PeerId ANY_PEER(butil::EndPoint(butil::IP_ANY, 0), 0);

Expand Down Expand Up @@ -762,6 +763,8 @@ class Node {
// see from the website.
void get_status(NodeStatus* status);

void get_log_mgr_status(LogManagerStatus* log_manager_status);

// Make this node enter readonly mode.
// Readonly mode should only be used to protect the system in some extreme cases.
// For example, in a storage system, too many write requests flood into the system
Expand Down Expand Up @@ -789,6 +792,10 @@ class Node {
// is less than the majority.
bool readonly();

void set_reject_log_index(const int64_t log_index);

int64_t get_reject_log_index() const;

private:
NodeImpl* _impl;
};
Expand Down
Loading