Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add failure handling for SAI get operations #1768

Merged
merged 12 commits into from
Jul 7, 2021
6 changes: 5 additions & 1 deletion orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2309,7 +2309,11 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
else
{
SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status);
throw "AclOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "AclOrch initialization failure";
}
}

queryAclActionCapability();
Expand Down
6 changes: 5 additions & 1 deletion orchagent/copporch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status);
throw "CoppOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "CoppOrch initialization failure";
}
}

SWSS_LOG_INFO("Get default trap group");
Expand Down
12 changes: 10 additions & 2 deletions orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,11 @@ void CrmOrch::getResAvailableCounters()
break;
}
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32;
Expand Down Expand Up @@ -517,7 +521,11 @@ void CrmOrch::getResAvailableCounters()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

for (uint32_t i = 0; i < attr.value.aclresource.count; i++)
Expand Down
36 changes: 30 additions & 6 deletions orchagent/fabricportsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ int FabricPortsOrch::getFabricPortList()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status);
return FABRIC_PORT_ERROR;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
return FABRIC_PORT_ERROR;
}
}
m_fabricPortCount = attr.value.u32;
SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount);
Expand All @@ -101,7 +105,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port list failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port list failure");
}
}

for (i = 0; i < m_fabricPortCount; i++)
Expand All @@ -113,7 +121,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port lane failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port lane failure");
}
}
int lane = attr.value.u32list.list[0];
m_fabricLanePortMap[lane] = fabric_port_list[i];
Expand Down Expand Up @@ -198,7 +210,11 @@ void FabricPortsOrch::updateFabricPortState()
{
// Port may not be ready for query
SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (m_portStatus.find(lane) != m_portStatus.end() &&
Expand All @@ -215,15 +231,23 @@ void FabricPortsOrch::updateFabricPortState()
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote id failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote id failure");
}
}
remote_peer = attr.value.u32;

attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX;
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
}
}
remote_port = attr.value.u32;
}
Expand Down
6 changes: 5 additions & 1 deletion orchagent/fdborch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port)
{
SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d",
mac.to_string().c_str(), status);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port))
Expand Down
10 changes: 7 additions & 3 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
{
SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d",
nextHops.to_string().c_str(), status);
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status);
if (handle_status != task_process_status::task_success)
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
}
return false;
}
return false;
}
fgNhgEntry->real_bucket_size = nhg_attr.value.u32;
}
Expand Down
15 changes: 10 additions & 5 deletions orchagent/macsecorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -854,15 +854,20 @@ bool MACsecOrch::initMACsecObject(sai_object_id_t switch_id)
attrs.clear();
attr.id = SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL;
attrs.push_back(attr);
if (sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data()) != SAI_STATUS_SUCCESS)
status = sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_WARN(
"Cannot get MACsec attribution SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL at the switch 0x%" PRIx64,
switch_id);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_MACSEC, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}
macsec_obj.first->second.m_sci_in_ingress_macsec_acl = attrs.front().value.booldata;

Expand Down
6 changes: 5 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,11 @@ void NeighOrch::voqSyncAddNeigh(string &alias, IpAddress &ip_address, const MacA
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get neighbor attribute for %s on %s, rv:%d", ip_address.to_string().c_str(), alias.c_str(), status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEIGHBOR, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (!attr.value.u32)
Expand Down
29 changes: 29 additions & 0 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat
return task_need_retry;
}

task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis get
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: task_success - Handled the status successfully. No need to retry this SAI operation.
* task_need_retry - Cannot handle the status. Need to retry the SAI operation.
* task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure.
* TODO: 1. Add general handling logic for specific statuses
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
return task_success;
case SAI_STATUS_NOT_IMPLEMENTED:
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
sai_serialize_api(api).c_str());
throw std::logic_error("SAI get function not implemented");
default:
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
}
return task_failed;
}

bool Orch::parseHandleSaiStatusFailure(task_process_status status)
{
/*
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ class Orch
virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);
private:
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);
Expand Down
Loading