Skip to content

Commit

Permalink
Fix race condition caused by strand wrap method (#104)
Browse files Browse the repository at this point in the history
Description of PR
Summary:
Fixes # (issue)
This PR is to fix race condition introduced by wrap method of boost::asio::io_service::strand:

In the following case:

async_op_1(..., s.wrap(a));
async_op_2(..., s.wrap(b));

the completion of the first async operation will perform s.dispatch(a), and the second will perform s.dispatch(b), but the order in which those are performed is unspecified. That is, you cannot state whether one happens-before the other. Therefore, none of the above conditions are met and no ordering guarantee is made.

sign-off: Jing Zhang zhangjing@microsoft.com

Type of change
 Bug fix
 New feature
 Doc/Design
 Unit test
Approach
What is the motivation for this PR?
To fix the bug that linkgmrd cleans mux metrics table after setting mux state to orchagent.

How did you do it?
Use boost::asio::post() to replace wrap.
Add unit tests to cover this race condition scenario:
Remove override of setMuxState, postMetricsEvent in FakeDbinterface so the real implementation can be executed.
Override setMuxState, postMetricsEvent in FakeMuxPort as LinkManagerStateMachine tests use fake mux ports, so existing tests won't be broken.
Initiate thread pool in MuxManager test to better mimic, and run 1000 times setMuxState & postMetricsEvent to reproduce.
How did you verify/test it?
Unit tests. Verified:

With wrap, issue was reproduced 14/1000 times.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] MuxManagerTest.DbInterfaceRaceConditionCheck
test/MuxManagerTest.cpp:1014: Failure
Value of: mDbInterfacePtr->mDbInterfaceRaceConditionCheckFailure
  Actual: true
Expected: false
...
With boost::asio::post(), issue was not reproducible.
  • Loading branch information
zjswhhh authored Aug 9, 2022
1 parent 4cff43f commit 3f7a6f2
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 87 deletions.
50 changes: 20 additions & 30 deletions src/DbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ void DbInterface::getMuxState(const std::string &portName)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleGetMuxState,
this,
portName
)));
));
}

//
Expand All @@ -87,13 +86,12 @@ void DbInterface::setMuxState(const std::string &portName, mux_state::MuxState::
{
MUXLOGDEBUG(boost::format("%s: setting mux to %s") % portName % mMuxState[label]);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleSetMuxState,
this,
portName,
label
)));
));
}

//
Expand All @@ -105,13 +103,12 @@ void DbInterface::setPeerMuxState(const std::string &portName, mux_state::MuxSta
{
MUXLOGDEBUG(boost::format("%s: setting peer mux to %s") % portName % mMuxState[label]);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleSetPeerMuxState,
this,
portName,
label
)));
));
}


Expand All @@ -124,12 +121,11 @@ void DbInterface::probeMuxState(const std::string &portName)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleProbeMuxState,
this,
portName
)));
));
}

//
Expand All @@ -141,12 +137,11 @@ void DbInterface::probeForwardingState(const std::string &portName)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleProbeForwardingState,
this,
portName
)));
));
}

//
Expand All @@ -158,13 +153,12 @@ void DbInterface::setMuxLinkmgrState(const std::string &portName, link_manager::
{
MUXLOGDEBUG(boost::format("%s: setting mux linkmgr to %s") % portName % mMuxLinkmgrState[static_cast<int> (label)]);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleSetMuxLinkmgrState,
this,
portName,
label
)));
));
}

//
Expand All @@ -187,15 +181,14 @@ void DbInterface::postMetricsEvent(
mMuxMetrics[static_cast<int> (metrics)]
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handlePostMuxMetrics,
this,
portName,
metrics,
label,
boost::posix_time::microsec_clock::universal_time()
)));
));
}

//
Expand All @@ -215,14 +208,13 @@ void DbInterface::postLinkProberMetricsEvent(
mLinkProbeMetrics[static_cast<int> (metrics)]
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handlePostLinkProberMetrics,
this,
portName,
metrics,
boost::posix_time::microsec_clock::universal_time()
)));
));
}

//
Expand All @@ -244,14 +236,13 @@ void DbInterface::postPckLossRatio(
expectedPacketCount
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand,boost::bind(
&DbInterface::handlePostPckLossRatio,
this,
portName,
unknownEventCount,
expectedPacketCount
)));
));
}

//
Expand Down Expand Up @@ -841,13 +832,12 @@ void DbInterface::setMuxMode(const std::string &portName, const std::string stat
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand,boost::bind(
&DbInterface::handleSetMuxMode,
this,
portName,
state
)));
));
}

//
Expand Down
74 changes: 37 additions & 37 deletions src/DbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,19 @@ class DbInterface
*
*@return none
*/
virtual void setMuxState(const std::string &portName, mux_state::MuxState::Label label);
void setMuxState(const std::string &portName, mux_state::MuxState::Label label);

/**
*@method handleSetMuxState
*
*@brief set MUX state in APP DB for orchagent processing
*
*@param portName (in) MUX/port name
*@param label (in) label of target state
*
*@return none
*/
virtual void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label);

/**
*@method setPeerMuxState
Expand All @@ -150,7 +162,7 @@ class DbInterface
*
*@return none
*/
virtual void setPeerMuxState(const std::string &portName, mux_state::MuxState::Label label);
void setPeerMuxState(const std::string &portName, mux_state::MuxState::Label label);

/**
*@method probeMuxState
Expand All @@ -172,7 +184,7 @@ class DbInterface
*
* @return none
*/
virtual void probeForwardingState(const std::string &portName);
void probeForwardingState(const std::string &portName);

/**
*@method setMuxLinkmgrState
Expand All @@ -197,12 +209,31 @@ class DbInterface
*
*@return none
*/
virtual void postMetricsEvent(
void postMetricsEvent(
const std::string &portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label
);

/**
*@method handlePostMuxMetrics
*
*@brief set MUX metrics to state db
*
*@param portName (in) MUX/port name
*@param metrics (in) metrics data
*@param label (in) label of target state
*@param time (in) current time
*
*@return none
*/
virtual void handlePostMuxMetrics(
const std::string portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
);

/**
* @method postLinkProberMetricsEvent
*
Expand Down Expand Up @@ -339,18 +370,6 @@ class DbInterface
*/
void handleGetMuxState(const std::string portName);

/**
*@method handleSetMuxState
*
*@brief set MUX state in APP DB for orchagent processing
*
*@param portName (in) MUX/port name
*@param label (in) label of target state
*
*@return none
*/
void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label);

/**
*@method handleSetPeerMuxState
*
Expand All @@ -361,7 +380,7 @@ class DbInterface
*
*@return none
*/
void handleSetPeerMuxState(const std::string portName, mux_state::MuxState::Label label);
virtual void handleSetPeerMuxState(const std::string portName, mux_state::MuxState::Label label);

/**
*@method handleProbeMuxState
Expand All @@ -383,7 +402,7 @@ class DbInterface
*
* @return none
*/
void handleProbeForwardingState(const std::string portName);
virtual void handleProbeForwardingState(const std::string portName);

/**
*@method handleSetMuxLinkmgrState
Expand All @@ -397,25 +416,6 @@ class DbInterface
*/
void handleSetMuxLinkmgrState(const std::string portName, link_manager::ActiveStandbyStateMachine::Label label);

/**
*@method handlePostMuxMetrics
*
*@brief set MUX metrics to state db
*
*@param portName (in) MUX/port name
*@param metrics (in) metrics data
*@param label (in) label of target state
*@param time (in) current time
*
*@return none
*/
void handlePostMuxMetrics(
const std::string portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
);

/**
* @method handlePostLinkProberMetrics
*
Expand Down
4 changes: 2 additions & 2 deletions src/MuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*
*@return none
*/
inline void setMuxState(mux_state::MuxState::Label label) {mDbInterfacePtr->setMuxState(mMuxPortConfig.getPortName(), label);};
virtual inline void setMuxState(mux_state::MuxState::Label label) {mDbInterfacePtr->setMuxState(mMuxPortConfig.getPortName(), label);};

/**
*@method setPeerMuxState
Expand Down Expand Up @@ -170,7 +170,7 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*
*@return none
*/
inline void postMetricsEvent(
virtual inline void postMetricsEvent(
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label
) {
Expand Down
19 changes: 12 additions & 7 deletions test/FakeDbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@ FakeDbInterface::FakeDbInterface(mux::MuxManager *muxManager, boost::asio::io_se
{
}

void FakeDbInterface::setMuxState(const std::string &portName, mux_state::MuxState::Label label)
void FakeDbInterface::handleSetMuxState(const std::string portName, mux_state::MuxState::Label label)
{
mLastSetMuxState = label;
mSetMuxStateInvokeCount++;

mDbInterfaceRaceConditionCheckFailure = false;
}

void FakeDbInterface::setPeerMuxState(const std::string &portName, mux_state::MuxState::Label label)
void FakeDbInterface::handleSetPeerMuxState(const std::string portName, mux_state::MuxState::Label label)
{
mLastSetPeerMuxState = label;
mSetPeerMuxStateInvokeCount++;
Expand All @@ -60,7 +62,7 @@ void FakeDbInterface::probeMuxState(const std::string &portName)
mProbeMuxStateInvokeCount++;
}

void FakeDbInterface::probeForwardingState(const std::string &portName)
void FakeDbInterface::handleProbeForwardingState(const std::string portName)
{
mProbeForwardingStateInvokeCount++;
}
Expand All @@ -70,13 +72,16 @@ void FakeDbInterface::setMuxLinkmgrState(const std::string &portName, link_manag
mSetMuxLinkmgrStateInvokeCount++;
}

void FakeDbInterface::postMetricsEvent(
const std::string &portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label
void FakeDbInterface::handlePostMuxMetrics(
const std::string portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
)
{
mPostMetricsInvokeCount++;

mDbInterfaceRaceConditionCheckFailure = true;
}

void FakeDbInterface::postLinkProberMetricsEvent(
Expand Down
16 changes: 10 additions & 6 deletions test/FakeDbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,20 @@ class FakeDbInterface: public mux::DbInterface
FakeDbInterface(mux::MuxManager *muxManager, boost::asio::io_service *ioService);
virtual ~FakeDbInterface() = default;

virtual void setMuxState(const std::string &portName, mux_state::MuxState::Label label) override;
virtual void setPeerMuxState(const std::string &portName, mux_state::MuxState::Label label) override;
virtual void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label) override;
virtual void handleSetPeerMuxState(const std::string portName, mux_state::MuxState::Label label) override;
virtual void getMuxState(const std::string &portName) override;
virtual void probeMuxState(const std::string &portName) override;
virtual void probeForwardingState(const std::string &portName) override;
virtual void handleProbeForwardingState(const std::string portName) override;
virtual void setMuxLinkmgrState(
const std::string &portName,
link_manager::ActiveStandbyStateMachine::Label label
) override;
virtual void postMetricsEvent(
const std::string &portName,
virtual void handlePostMuxMetrics(
const std::string portName,
link_manager::ActiveStandbyStateMachine::Metrics metrics,
mux_state::MuxState::Label label
mux_state::MuxState::Label label,
boost::posix_time::ptime time
) override;
virtual void postLinkProberMetricsEvent(
const std::string &portName,
Expand Down Expand Up @@ -88,7 +89,10 @@ class FakeDbInterface: public mux::DbInterface
uint64_t mExpectedPacketCount = 0;
uint32_t mSetMuxModeInvokeCount = 0;
uint32_t mSetWarmStartStateReconciledInvokeCount = 0;

bool mWarmStartFlag = false;

bool mDbInterfaceRaceConditionCheckFailure = false;
};

} /* namespace test */
Expand Down
Loading

0 comments on commit 3f7a6f2

Please sign in to comment.