Skip to content

Commit

Permalink
[active-standby] Fix show mux status inconsistency introduced by or…
Browse files Browse the repository at this point in the history
…chagent rollback (#225)

Approach
What is the motivation for this PR?
This is to fix the show mux status inconsistency introduced by orchagent roll back.

In mux port state machine design, linkmgrd honors hardware state for active-standby ports, and never intends to trigger a secondary toggle when everything is healthy. But after we introduce orchagent rollback, show mux status can return unmatched APP_DB and STATE_DB entries for this, which blocks upgrade.

Hence, submitting this PR as a workaround.

sign-off: Jing Zhang [email protected]

Work item tracking
Microsoft ADO (number only):
26136887

How did you do it?
How did you verify/test it?
  • Loading branch information
zjswhhh authored and mssonicbld committed Feb 2, 2024
1 parent 74e455e commit 70b6d15
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 3 deletions.
7 changes: 5 additions & 2 deletions src/link_manager/LinkManagerStateMachineActiveStandby.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@ void ActiveStandbyStateMachine::switchMuxState(
mWaitActiveUpCount = 0;
}

mLastSetMuxState = label;

enterMuxState(nextState, mux_state::MuxState::Label::Wait);
mMuxStateMachine.setWaitStateCause(mux_state::WaitState::WaitStateCause::SwssUpdate);
mMuxPortPtr->postMetricsEvent(Metrics::SwitchingStart, label);
Expand Down Expand Up @@ -621,10 +623,11 @@ void ActiveStandbyStateMachine::handleGetMuxStateNotification(mux_state::MuxStat
{
MUXLOGINFO(boost::format("%s: state db mux state: %s") % mMuxPortConfig.getPortName() % mMuxStateName[label]);

if (mComponentInitState.all() && ms(mCompositeState) != label &&
if (mComponentInitState.all() &&
ms(mCompositeState) != mux_state::MuxState::Wait &&
ms(mCompositeState) != mux_state::MuxState::Error &&
ms(mCompositeState) != mux_state::MuxState::Unknown) {
ms(mCompositeState) != mux_state::MuxState::Unknown &&
(ms(mCompositeState) != label || mLastSetMuxState != label)) {
// notify swss of mux state change
MUXLOGWARNING(boost::format("%s: Switching MUX state from '%s' to '%s' to match linkmgrd/xcvrd state") %
mMuxPortConfig.getPortName() %
Expand Down
2 changes: 2 additions & 0 deletions src/link_manager/LinkManagerStateMachineActiveStandby.h
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,8 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
bool mPendingMuxModeChange = false;
common::MuxPortConfig::Mode mTargetMuxMode = common::MuxPortConfig::Mode::Auto;

mux_state::MuxState::Label mLastSetMuxState = mux_state::MuxState::Label::Wait;

bool mContinuousLinkProberUnknownEvent = false; // When posting unknown_end event, we want to make sure the previous state is unknown.

link_manager::ActiveStandbyStateMachine::SwitchCause mSendSwitchActiveCommandCause;
Expand Down
28 changes: 28 additions & 0 deletions test/LinkManagerStateMachineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,34 @@ TEST_F(LinkManagerStateMachineTest, TimedOscillation)
mMuxConfig.setTimeoutIpv4_msec(10);
}

TEST_F(LinkManagerStateMachineTest, OrchagentRollback)
{
setMuxStandby();

EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);

handleMuxConfig("active", 4);
VALIDATE_STATE(Wait, Wait, Up);
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 1);

// orchagent rollback, toggle is not successful.
postLinkProberEvent(link_prober::LinkProberState::Standby, 3);
VALIDATE_STATE(Standby, Wait, Up);
handleMuxState("standby", 3);
VALIDATE_STATE(Standby, Standby, Up);

// extra toggle to make app_db entry in sync with state_db
handleGetMuxState("standby", 2);
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 2);
VALIDATE_STATE(Standby, Wait, Up);

handleMuxState("standby", 3);
handleGetMuxState("standby", 2);
VALIDATE_STATE(Standby, Standby, Up);

// no 3rd toggle
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 2);
}

TEST_F(LinkManagerStateMachineTest, ProbeLinkInSuspendTimeout)
{
Expand Down
2 changes: 1 addition & 1 deletion test/MuxManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ INSTANTIATE_TEST_CASE_P(
MuxState,
GetMuxStateTest,
::testing::Values(
std::make_tuple("active", mux_state::MuxState::Label::Active),
std::make_tuple("active", mux_state::MuxState::Label::Wait),
std::make_tuple("standby", mux_state::MuxState::Label::Wait),
std::make_tuple("unknown", mux_state::MuxState::Label::Wait),
std::make_tuple("error", mux_state::MuxState::Label::Wait)
Expand Down

0 comments on commit 70b6d15

Please sign in to comment.