Skip to content

Commit

Permalink
Fix race condition caused by strand wrap method (#104) (#110)
Browse files Browse the repository at this point in the history
Description of PR
Summary:
Fixes # (issue)
This PR is to fix race condition introduced by wrap method of boost::asio::io_service::strand:

In the following case:

async_op_1(..., s.wrap(a));
async_op_2(..., s.wrap(b));

the completion of the first async operation will perform s.dispatch(a), and the second will perform s.dispatch(b), but the order in which those are performed is unspecified. That is, you cannot state whether one happens-before the other. Therefore, none of the above conditions are met and no ordering guarantee is made.

sign-off: Jing Zhang zhangjing@microsoft.com

Type of change
 Bug fix
 New feature
 Doc/Design
 Unit test
Approach
What is the motivation for this PR?
To fix the bug that linkgmrd cleans mux metrics table after setting mux state to orchagent.

How did you do it?
Use boost::asio::post() to replace wrap.
Add unit tests to cover this race condition scenario:
Remove override of setMuxState, postMetricsEvent in FakeDbinterface so the real implementation can be executed.
Override setMuxState, postMetricsEvent in FakeMuxPort as LinkManagerStateMachine tests use fake mux ports, so existing tests won't be broken.
Initiate thread pool in MuxManager test to better mimic, and run 1000 times setMuxState & postMetricsEvent to reproduce.
How did you verify/test it?
Unit tests. Verified:

With wrap, issue was reproduced 14/1000 times.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] MuxManagerTest.DbInterfaceRaceConditionCheck
test/MuxManagerTest.cpp:1014: Failure
Value of: mDbInterfacePtr->mDbInterfaceRaceConditionCheckFailure
  Actual: true
Expected: false
...
With boost::asio::post(), issue was not reproducible.
  • Loading branch information
zjswhhh authored Aug 12, 2022
1 parent f68a03e commit 86ddd95
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 68 deletions.
40 changes: 16 additions & 24 deletions src/DbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ void DbInterface::getMuxState(const std::string &portName)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleGetMuxState,
this,
portName
)));
));
}

//
Expand All @@ -87,13 +86,12 @@ void DbInterface::setMuxState(const std::string &portName, mux_state::MuxState::
{
MUXLOGDEBUG(boost::format("%s: setting mux to %s") % portName % mMuxState[label]);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleSetMuxState,
this,
portName,
label
)));
));
}

//
Expand All @@ -105,12 +103,11 @@ void DbInterface::probeMuxState(const std::string &portName)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleProbeMuxState,
this,
portName
)));
));
}

//
Expand All @@ -122,13 +119,12 @@ void DbInterface::setMuxLinkmgrState(const std::string &portName, link_manager::
{
MUXLOGDEBUG(boost::format("%s: setting mux linkmgr to %s") % portName % mMuxLinkmgrState[static_cast<int> (label)]);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handleSetMuxLinkmgrState,
this,
portName,
label
)));
));
}

//
Expand All @@ -151,15 +147,14 @@ void DbInterface::postMetricsEvent(
mMuxMetrics[static_cast<int> (metrics)]
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handlePostMuxMetrics,
this,
portName,
metrics,
label,
boost::posix_time::microsec_clock::universal_time()
)));
));
}

//
Expand All @@ -179,14 +174,13 @@ void DbInterface::postLinkProberMetricsEvent(
mLinkProbeMetrics[static_cast<int> (metrics)]
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&DbInterface::handlePostLinkProberMetrics,
this,
portName,
metrics,
boost::posix_time::microsec_clock::universal_time()
)));
));
}

//
Expand All @@ -208,14 +202,13 @@ void DbInterface::postPckLossRatio(
expectedPacketCount
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand,boost::bind(
&DbInterface::handlePostPckLossRatio,
this,
portName,
unknownEventCount,
expectedPacketCount
)));
));
}

//
Expand Down Expand Up @@ -608,13 +601,12 @@ void DbInterface::setMuxMode(const std::string &portName, const std::string stat
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand,boost::bind(
&DbInterface::handleSetMuxMode,
this,
portName,
state
)));
));
}

//
Expand Down
66 changes: 33 additions & 33 deletions src/DbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,19 @@ class DbInterface
*
*@return none
*/
virtual void setMuxState(const std::string &portName, mux_state::MuxState::Label label);
void setMuxState(const std::string &portName, mux_state::MuxState::Label label);

/**
*@method handleSetMuxState
*
*@brief set MUX state in APP DB for orchagent processing
*
*@param portName (in) MUX/port name
*@param label (in) label of target state
*
*@return none
*/
virtual void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label);

/**
*@method probeMuxState
Expand Down Expand Up @@ -168,12 +180,31 @@ class DbInterface
*
*@return none
*/
virtual void postMetricsEvent(
void postMetricsEvent(
const std::string &portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label
);

/**
*@method handlePostMuxMetrics
*
*@brief set MUX metrics to state db
*
*@param portName (in) MUX/port name
*@param metrics (in) metrics data
*@param label (in) label of target state
*@param time (in) current time
*
*@return none
*/
virtual void handlePostMuxMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
);

/**
* @method postLinkProberMetricsEvent
*
Expand Down Expand Up @@ -310,18 +341,6 @@ class DbInterface
*/
void handleGetMuxState(const std::string portName);

/**
*@method handleSetMuxState
*
*@brief set MUX state in APP DB for orchagent processing
*
*@param portName (in) MUX/port name
*@param label (in) label of target state
*
*@return none
*/
void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label);

/**
*@method handleProbeMuxState
*
Expand All @@ -345,25 +364,6 @@ class DbInterface
*/
void handleSetMuxLinkmgrState(const std::string portName, link_manager::LinkManagerStateMachine::Label label);

/**
*@method handlePostMuxMetrics
*
*@brief set MUX metrics to state db
*
*@param portName (in) MUX/port name
*@param metrics (in) metrics data
*@param label (in) label of target state
*@param time (in) current time
*
*@return none
*/
void handlePostMuxMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
);

/**
* @method handlePostLinkProberMetrics
*
Expand Down
4 changes: 2 additions & 2 deletions src/MuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*
*@return none
*/
inline void setMuxState(mux_state::MuxState::Label label) {mDbInterfacePtr->setMuxState(mMuxPortConfig.getPortName(), label);};
virtual inline void setMuxState(mux_state::MuxState::Label label) {mDbInterfacePtr->setMuxState(mMuxPortConfig.getPortName(), label);};

/**
*@method getMuxState
Expand Down Expand Up @@ -157,7 +157,7 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*
*@return none
*/
inline void postMetricsEvent(
virtual inline void postMetricsEvent(
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label
) {
Expand Down
15 changes: 10 additions & 5 deletions test/FakeDbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ FakeDbInterface::FakeDbInterface(mux::MuxManager *muxManager, boost::asio::io_se
{
}

void FakeDbInterface::setMuxState(const std::string &portName, mux_state::MuxState::Label label)
void FakeDbInterface::handleSetMuxState(const std::string portName, mux_state::MuxState::Label label)
{
mSetMuxStateInvokeCount++;

mDbInterfaceRaceConditionCheckFailure = false;
}

void FakeDbInterface::getMuxState(const std::string &portName)
Expand All @@ -58,13 +60,16 @@ void FakeDbInterface::setMuxLinkmgrState(const std::string &portName, link_manag
mSetMuxLinkmgrStateInvokeCount++;
}

void FakeDbInterface::postMetricsEvent(
const std::string &portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label
void FakeDbInterface::handlePostMuxMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label,
boost::posix_time::ptime time
)
{
mPostMetricsInvokeCount++;

mDbInterfaceRaceConditionCheckFailure = true;
}

void FakeDbInterface::postLinkProberMetricsEvent(
Expand Down
12 changes: 8 additions & 4 deletions test/FakeDbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,18 @@ class FakeDbInterface: public mux::DbInterface
FakeDbInterface(mux::MuxManager *muxManager, boost::asio::io_service *ioService);
virtual ~FakeDbInterface() = default;

virtual void setMuxState(const std::string &portName, mux_state::MuxState::Label label) override;
virtual void handleSetMuxState(const std::string portName, mux_state::MuxState::Label label) override;
virtual void getMuxState(const std::string &portName) override;
virtual void probeMuxState(const std::string &portName) override;
virtual void setMuxLinkmgrState(
const std::string &portName,
link_manager::LinkManagerStateMachine::Label label
) override;
virtual void postMetricsEvent(
const std::string &portName,
virtual void handlePostMuxMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label
mux_state::MuxState::Label label,
boost::posix_time::ptime time
) override;
virtual void postLinkProberMetricsEvent(
const std::string &portName,
Expand Down Expand Up @@ -81,7 +82,10 @@ class FakeDbInterface: public mux::DbInterface
uint64_t mExpectedPacketCount = 0;
uint32_t mSetMuxModeInvokeCount = 0;
uint32_t mSetWarmStartStateReconciledInvokeCount = 0;

bool mWarmStartFlag = false;

bool mDbInterfaceRaceConditionCheckFailure = false;
};

} /* namespace test */
Expand Down
8 changes: 8 additions & 0 deletions test/FakeMuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ class FakeMuxPort: public ::mux::MuxPort

void activateStateMachine();

virtual inline void postMetricsEvent(
link_manager::LinkManagerStateMachine::Metrics metrics,
mux_state::MuxState::Label label
) {
mDbInterfacePtr->handlePostMuxMetrics(mMuxPortConfig.getPortName(), metrics, label, boost::posix_time::microsec_clock::universal_time());
};
virtual inline void setMuxState(mux_state::MuxState::Label label) {mDbInterfacePtr->handleSetMuxState(mMuxPortConfig.getPortName(), label);};

const link_manager::LinkManagerStateMachine::CompositeState& getCompositeState() {return getLinkManagerStateMachine()->getCompositeState();};
link_prober::LinkProberStateMachine& getLinkProberStateMachine() {return getLinkManagerStateMachine()->getLinkProberStateMachine();};
mux_state::MuxStateMachine& getMuxStateMachine() {return getLinkManagerStateMachine()->getMuxStateMachine();};
Expand Down
53 changes: 53 additions & 0 deletions test/MuxManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,36 @@ void MuxManagerTest::startWarmRestartReconciliationTimer(uint32_t timeout)
);
}

void MuxManagerTest::postMetricsEvent(const std::string &portName, mux_state::MuxState::Label label)
{
std::shared_ptr<mux::MuxPort> muxPortPtr = mMuxManagerPtr->mPortMap[portName];

return muxPortPtr->postMetricsEvent(link_manager::LinkManagerStateMachine::Metrics::SwitchingStart, label);
}

void MuxManagerTest::setMuxState(const std::string &portName, mux_state::MuxState::Label label)
{
std::shared_ptr<mux::MuxPort> muxPortPtr = mMuxManagerPtr->mPortMap[portName];

return muxPortPtr->setMuxState(label);
}

void MuxManagerTest::initializeThread()
{
for (uint8_t i = 0; i < 3; i++) {
mMuxManagerPtr->mThreadGroup.create_thread(
boost::bind(&boost::asio::io_service::run, &(mMuxManagerPtr->getIoService()))
);
}
}

void MuxManagerTest::terminate()
{
mMuxManagerPtr->getIoService().stop();
mMuxManagerPtr->mWork.~work(); // destructor is used to inform work is finished, only then run() is permitted to exit.
mMuxManagerPtr->mThreadGroup.join_all();
}

void MuxManagerTest::createPort(std::string port)
{
EXPECT_TRUE(mMuxManagerPtr->mPortMap.size() == 0);
Expand Down Expand Up @@ -540,4 +570,27 @@ TEST_F(MuxManagerTest, WarmRestartTimeout)
EXPECT_EQ(mDbInterfacePtr->mSetWarmStartStateReconciledInvokeCount, 1);
}

TEST_F(MuxManagerTest, DbInterfaceRaceConditionCheck)
{
createPort("Ethernet0");

// create thread pool
initializeThread();

uint32_t TOGGLE_COUNT = 1000;

for (uint32_t i=0; i<TOGGLE_COUNT; i++) {
postMetricsEvent("Ethernet0", mux_state::MuxState::Label::Active);
setMuxState("Ethernet0", mux_state::MuxState::Label::Active);

// wait for handler to be completed
usleep(1000);
EXPECT_FALSE(mDbInterfacePtr->mDbInterfaceRaceConditionCheckFailure);
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, i+1);
EXPECT_EQ(mDbInterfacePtr->mPostMetricsInvokeCount, i+1);
}

terminate();
}

} /* namespace test */
Loading

0 comments on commit 86ddd95

Please sign in to comment.