From fa57f4330d9bbd626d99b08400d8da5d24e11d9e Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 5 Jul 2024 12:50:59 -0400 Subject: [PATCH 01/27] naively set node state unhealthy on error --- resource/graph_node.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index eb92593f4fa..e77992e5379 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -26,12 +26,14 @@ const ( // NodeStateConfiguring denotes a resource is being configured. NodeStateConfiguring - // NodeStateReady denotes a resource that has been initialized and is not being - // configured or removed. A resource in this state may be unhealthy. + // NodeStateReady denotes a resource that has been configured and is healthy. NodeStateReady // NodeStateRemoving denotes a resource is being removed from the resource graph. NodeStateRemoving + + // NodeStateUnhealthy denotes a resource is unhealthy. + NodeStateUnhealthy ) // A GraphNode contains the current state of a resource. @@ -276,9 +278,10 @@ func (w *GraphNode) MarkedForRemoval() bool { // The additional `args` should come in key/value pairs for structured logging. func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() + defer w.mu.Unlock() + w.lastErr = err - // TODO(RSDK-7903): transition to an "unhealthy" state. - w.mu.Unlock() + w.transitionTo(NodeStateUnhealthy) if w.logger != nil { w.logger.Errorw(err.Error(), args...) From 3592bc8fbd11db3370bd2efc85a4e81f8d2197b5 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Thu, 1 Aug 2024 17:41:57 -0400 Subject: [PATCH 02/27] keep last state when unhealthy --- resource/graph_node.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index e77992e5379..331d0308c99 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -36,6 +36,15 @@ const ( NodeStateUnhealthy ) +type GraphNodeError struct { + err error + lastState NodeState +} + +func (wErr *GraphNodeError) Error() string { + return wErr.err.Error() +} + // A GraphNode contains the current state of a resource. // Based on these states, the underlying Resource may or may not be available. // Additionally, the node can be informed that the resource either needs to be @@ -60,7 +69,7 @@ type GraphNode struct { // GraphNode was constructed or last reconfigured. It returns nil if the GraphNode is // unconfigured. lastReconfigured *time.Time - lastErr error + lastErr *GraphNodeError unresolvedDependencies []string needsDependencyResolution bool @@ -280,7 +289,7 @@ func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() defer w.mu.Unlock() - w.lastErr = err + w.lastErr = &GraphNodeError{err: err, lastState: w.state} w.transitionTo(NodeStateUnhealthy) if w.logger != nil { @@ -302,6 +311,9 @@ func (w *GraphNode) Config() Config { func (w *GraphNode) NeedsReconfigure() bool { w.mu.RLock() defer w.mu.RUnlock() + if w.state == NodeStateUnhealthy { + return w.lastErr.lastState == NodeStateConfiguring + } return w.state == NodeStateConfiguring } From 9751ccd7901a0bd587b69b9f656874fff5d2a0dd Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 01:00:22 -0400 Subject: [PATCH 03/27] update status test --- robot/impl/local_robot_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robot/impl/local_robot_test.go b/robot/impl/local_robot_test.go index 42ea460fe23..8fc92758229 100644 --- a/robot/impl/local_robot_test.go +++ b/robot/impl/local_robot_test.go @@ -3724,7 +3724,7 @@ func TestMachineStatus(t *testing.T) { []resource.Status{ { Name: mockNamed("m"), - State: resource.NodeStateConfiguring, + State: resource.NodeStateUnhealthy, Revision: rev2, }, }, From e2e139df032a9f05e3d82f257156231feaccf2b3 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 01:08:52 -0400 Subject: [PATCH 04/27] marked for removal --- resource/graph_node.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/resource/graph_node.go b/resource/graph_node.go index 331d0308c99..04ae9772a8f 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -277,6 +277,9 @@ func (w *GraphNode) MarkForRemoval() { func (w *GraphNode) MarkedForRemoval() bool { w.mu.Lock() defer w.mu.Unlock() + if w.state == NodeStateUnhealthy { + return w.lastErr.lastState == NodeStateRemoving + } return w.state == NodeStateRemoving } From a5d2eef61e2f2c7a6178fa98235da8b69b275313 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 01:12:19 -0400 Subject: [PATCH 05/27] TODOs --- resource/graph_node.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 04ae9772a8f..b97847a3337 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -270,6 +270,7 @@ func (w *GraphNode) SwapResource(newRes Resource, newModel Model) { func (w *GraphNode) MarkForRemoval() { w.mu.Lock() defer w.mu.Unlock() + // TODO: what if node is unhealthy? w.transitionTo(NodeStateRemoving) } @@ -292,8 +293,11 @@ func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() defer w.mu.Unlock() - w.lastErr = &GraphNodeError{err: err, lastState: w.state} - w.transitionTo(NodeStateUnhealthy) + // TODO: what should we do if the node is already unhealthy? + if w.state != NodeStateUnhealthy { + w.lastErr = &GraphNodeError{err: err, lastState: w.state} + w.transitionTo(NodeStateUnhealthy) + } if w.logger != nil { w.logger.Errorw(err.Error(), args...) @@ -331,6 +335,7 @@ func (w *GraphNode) hasUnresolvedDependencies() bool { func (w *GraphNode) setNeedsReconfigure(newConfig Config, mustReconfigure bool, dependencies []string) { w.mu.Lock() defer w.mu.Unlock() + // TODO what if the node is unhealthy here? if !mustReconfigure && w.state == NodeStateRemoving { // This is the case where the node is being asked to update // with no new config but it was marked for removal otherwise. From be10ef00695f512ee961c0f694dd868330e36e86 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 13:49:50 -0400 Subject: [PATCH 06/27] simplify unhealthy state --- resource/graph_node.go | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index b97847a3337..63b2e48e277 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -7,6 +7,7 @@ import ( "time" "github.com/pkg/errors" + "go.uber.org/multierr" "go.viam.com/rdk/logging" ) @@ -36,15 +37,6 @@ const ( NodeStateUnhealthy ) -type GraphNodeError struct { - err error - lastState NodeState -} - -func (wErr *GraphNodeError) Error() string { - return wErr.err.Error() -} - // A GraphNode contains the current state of a resource. // Based on these states, the underlying Resource may or may not be available. // Additionally, the node can be informed that the resource either needs to be @@ -69,7 +61,7 @@ type GraphNode struct { // GraphNode was constructed or last reconfigured. It returns nil if the GraphNode is // unconfigured. lastReconfigured *time.Time - lastErr *GraphNodeError + lastErr error unresolvedDependencies []string needsDependencyResolution bool @@ -270,7 +262,6 @@ func (w *GraphNode) SwapResource(newRes Resource, newModel Model) { func (w *GraphNode) MarkForRemoval() { w.mu.Lock() defer w.mu.Unlock() - // TODO: what if node is unhealthy? w.transitionTo(NodeStateRemoving) } @@ -278,9 +269,6 @@ func (w *GraphNode) MarkForRemoval() { func (w *GraphNode) MarkedForRemoval() bool { w.mu.Lock() defer w.mu.Unlock() - if w.state == NodeStateUnhealthy { - return w.lastErr.lastState == NodeStateRemoving - } return w.state == NodeStateRemoving } @@ -293,11 +281,8 @@ func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() defer w.mu.Unlock() - // TODO: what should we do if the node is already unhealthy? - if w.state != NodeStateUnhealthy { - w.lastErr = &GraphNodeError{err: err, lastState: w.state} - w.transitionTo(NodeStateUnhealthy) - } + w.lastErr = multierr.Append(w.lastErr, err) + w.transitionTo(NodeStateUnhealthy) if w.logger != nil { w.logger.Errorw(err.Error(), args...) @@ -318,10 +303,7 @@ func (w *GraphNode) Config() Config { func (w *GraphNode) NeedsReconfigure() bool { w.mu.RLock() defer w.mu.RUnlock() - if w.state == NodeStateUnhealthy { - return w.lastErr.lastState == NodeStateConfiguring - } - return w.state == NodeStateConfiguring + return w.state == NodeStateConfiguring || w.state == NodeStateUnhealthy } // hasUnresolvedDependencies returns whether or not this node has any @@ -335,7 +317,6 @@ func (w *GraphNode) hasUnresolvedDependencies() bool { func (w *GraphNode) setNeedsReconfigure(newConfig Config, mustReconfigure bool, dependencies []string) { w.mu.Lock() defer w.mu.Unlock() - // TODO what if the node is unhealthy here? if !mustReconfigure && w.state == NodeStateRemoving { // This is the case where the node is being asked to update // with no new config but it was marked for removal otherwise. @@ -494,7 +475,7 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { case NodeStateConfiguring: //nolint switch state { - case NodeStateReady: + case NodeStateReady, NodeStateUnhealthy: return true } case NodeStateReady: @@ -504,6 +485,12 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { return true } case NodeStateRemoving: + case NodeStateUnhealthy: + //nolint + switch state { + case NodeStateConfiguring, NodeStateReady, NodeStateRemoving, NodeStateUnhealthy: + return true + } } return false } From b0f7d169ceadabc0991d0269efbc3459995fd34a Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 13:54:37 -0400 Subject: [PATCH 07/27] RSDK-6875: use std lib errors --- resource/graph_node.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 63b2e48e277..c846c89ecb2 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -2,13 +2,12 @@ package resource import ( "context" + "errors" "sync" + "sync/atomic" "time" - "github.com/pkg/errors" - "go.uber.org/multierr" - "go.viam.com/rdk/logging" ) @@ -281,7 +280,7 @@ func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() defer w.mu.Unlock() - w.lastErr = multierr.Append(w.lastErr, err) + w.lastErr = errors.Join(w.lastErr, err) w.transitionTo(NodeStateUnhealthy) if w.logger != nil { From 11f6031f429ef4114f84c32105ed386c234e5bdd Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 14:56:04 -0400 Subject: [PATCH 08/27] placeholder --- robot/server/server.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/robot/server/server.go b/robot/server/server.go index ae86ce6ca6d..997c34f7ef6 100644 --- a/robot/server/server.go +++ b/robot/server/server.go @@ -6,6 +6,7 @@ package server import ( "bytes" "context" + "fmt" "strings" "time" @@ -523,6 +524,8 @@ func (s *Server) GetMachineStatus(ctx context.Context, _ *pb.GetMachineStatusReq pbResStatus.State = pb.ResourceStatus_STATE_READY case resource.NodeStateRemoving: pbResStatus.State = pb.ResourceStatus_STATE_REMOVING + case resource.NodeStateUnhealthy: + panic(fmt.Sprintf("unexpected resource.NodeState: %#v", resStatus.State)) } result.Resources = append(result.Resources, pbResStatus) From 498ea0a01b8b04488255ad7ef65ce15f0f92745f Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 15:03:49 -0400 Subject: [PATCH 09/27] generate unhealthy state string --- resource/graph_node.go | 1 - resource/nodestate_string.go | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index c846c89ecb2..bfa58b6af3a 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -4,7 +4,6 @@ import ( "context" "errors" "sync" - "sync/atomic" "time" diff --git a/resource/nodestate_string.go b/resource/nodestate_string.go index 4955d45e628..24502c6aa9c 100644 --- a/resource/nodestate_string.go +++ b/resource/nodestate_string.go @@ -13,11 +13,12 @@ func _() { _ = x[NodeStateConfiguring-2] _ = x[NodeStateReady-3] _ = x[NodeStateRemoving-4] + _ = x[NodeStateUnhealthy-5] } -const _NodeState_name = "UnknownUnconfiguredConfiguringReadyRemoving" +const _NodeState_name = "UnknownUnconfiguredConfiguringReadyRemovingUnhealthy" -var _NodeState_index = [...]uint8{0, 7, 19, 30, 35, 43} +var _NodeState_index = [...]uint8{0, 7, 19, 30, 35, 43, 52} func (i NodeState) String() string { if i >= NodeState(len(_NodeState_index)-1) { From 0d2c4859edc426b3d0677e6a8dda600003e41dcb Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 16:01:15 -0400 Subject: [PATCH 10/27] update resource tests --- resource/graph_node_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resource/graph_node_test.go b/resource/graph_node_test.go index 6d16632b109..af6b150f4e6 100644 --- a/resource/graph_node_test.go +++ b/resource/graph_node_test.go @@ -147,9 +147,9 @@ func lifecycleTest(t *testing.T, node *resource.GraphNode, initialDeps []string) node.LogAndSetLastError(ourErr) _, err = node.Resource() test.That(t, err, test.ShouldNotBeNil) - test.That(t, err.Error(), test.ShouldContainSubstring, "pending removal") + test.That(t, err.Error(), test.ShouldContainSubstring, "whoops") - verifySameState(t, node) + verifyStateTransition(t, node, resource.NodeStateUnhealthy) test.That(t, node.UnresolvedDependencies(), test.ShouldResemble, initialDeps) @@ -185,7 +185,7 @@ func lifecycleTest(t *testing.T, node *resource.GraphNode, initialDeps []string) test.That(t, res, test.ShouldEqual, ourRes) test.That(t, node.IsUninitialized(), test.ShouldBeFalse) - verifySameState(t, node) + verifyStateTransition(t, node, resource.NodeStateUnhealthy) // it reconfigured ourRes2 := &someResource{Resource: testutils.NewUnimplementedResource(generic.Named("foo"))} @@ -226,7 +226,7 @@ func lifecycleTest(t *testing.T, node *resource.GraphNode, initialDeps []string) res, err = node.UnsafeResource() test.That(t, err, test.ShouldBeNil) test.That(t, res, test.ShouldEqual, ourRes2) - verifySameState(t, node) + verifyStateTransition(t, node, resource.NodeStateUnhealthy) // it reconfigured ourRes3 := &someResource{Resource: testutils.NewUnimplementedResource(generic.Named("fooa"))} From 5c03dea0a8b2f9eb0faaa85eef5bcfe806242bdf Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 17:18:19 -0400 Subject: [PATCH 11/27] bump api --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 7bc5164ef49..1cd3d6515df 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( go.uber.org/atomic v1.10.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.24.0 - go.viam.com/api v0.1.330 + go.viam.com/api v0.1.331 go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2 go.viam.com/utils v0.1.90 goji.io v2.0.2+incompatible diff --git a/go.sum b/go.sum index eacb7e2c790..2c21b2f32e7 100644 --- a/go.sum +++ b/go.sum @@ -1539,8 +1539,8 @@ go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= -go.viam.com/api v0.1.330 h1:U0GAFU6SNOtwEYnpONDAhSyeyrQ/4Z2LUeTc0+Kzjp0= -go.viam.com/api v0.1.330/go.mod h1:msa4TPrMVeRDcG4YzKA/S6wLEUC7GyHQE973JklrQ10= +go.viam.com/api v0.1.331 h1:79ggmTwRuKn92yi/fhTrw+ehHGb7yw5rrrZRtZ50Ih4= +go.viam.com/api v0.1.331/go.mod h1:msa4TPrMVeRDcG4YzKA/S6wLEUC7GyHQE973JklrQ10= go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2 h1:oBiK580EnEIzgFLU4lHOXmGAE3MxnVbeR7s1wp/F3Ps= go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2/go.mod h1:XM0tej6riszsiNLT16uoyq1YjuYPWlRBweTPRDanIts= go.viam.com/utils v0.1.90 h1:Bja+mtI3Cneu3lFb/7V98YBW828/5+d1oChulQC0FZY= From d90c854156ecaf3d2bea56a9cc46111036e08c84 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Fri, 2 Aug 2024 18:05:51 -0400 Subject: [PATCH 12/27] update client+server --- resource/graph_node.go | 10 ++++++++++ robot/client/client.go | 5 +++++ robot/client/client_test.go | 15 +++++++++++++++ robot/server/server.go | 6 ++++-- robot/server/server_test.go | 24 ++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index bfa58b6af3a..744b3ec643c 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -526,11 +526,17 @@ func (w *GraphNode) resourceStatus() Status { resName = w.current.Name() } + var err error + if w.state == NodeStateUnhealthy { + err = w.lastErr + } + return Status{ Name: resName, State: w.state, LastUpdated: w.transitionedAt, Revision: w.revision, + Error: err, } } @@ -540,4 +546,8 @@ type Status struct { State NodeState LastUpdated time.Time Revision string + + // Error contains any errors on the resource if it currently unhealthy. + // This field will be nil if the resource is not in the [NodeStateUnhealthy] state. + Error error } diff --git a/robot/client/client.go b/robot/client/client.go index 29327028b5d..d38cc0d2943 100644 --- a/robot/client/client.go +++ b/robot/client/client.go @@ -1063,6 +1063,11 @@ func (rc *RobotClient) MachineStatus(ctx context.Context) (robot.MachineStatus, resStatus.State = resource.NodeStateReady case pb.ResourceStatus_STATE_REMOVING: resStatus.State = resource.NodeStateRemoving + case pb.ResourceStatus_STATE_UNHEALTHY: + resStatus.State = resource.NodeStateUnhealthy + if pbResStatus.Error != "" { + resStatus.Error = errors.New(pbResStatus.Error) + } } mStatus.Resources = append(mStatus.Resources, resStatus) diff --git a/robot/client/client_test.go b/robot/client/client_test.go index 8a18c898f81..ca8b09628c4 100644 --- a/robot/client/client_test.go +++ b/robot/client/client_test.go @@ -2158,6 +2158,21 @@ func TestMachineStatus(t *testing.T) { }, 2, }, + { + "unhealthy status", + robot.MachineStatus{ + Config: config.Revision{Revision: "rev1"}, + Resources: []resource.Status{ + { + Name: arm.Named("brokenArm"), + State: resource.NodeStateUnhealthy, + Error: errors.New("bad configuration"), + Revision: "rev1", + }, + }, + }, + 0, + }, } { t.Run(tc.name, func(t *testing.T) { logger, logs := logging.NewObservedTestLogger(t) diff --git a/robot/server/server.go b/robot/server/server.go index 997c34f7ef6..411c2da6d9b 100644 --- a/robot/server/server.go +++ b/robot/server/server.go @@ -6,7 +6,6 @@ package server import ( "bytes" "context" - "fmt" "strings" "time" @@ -525,7 +524,10 @@ func (s *Server) GetMachineStatus(ctx context.Context, _ *pb.GetMachineStatusReq case resource.NodeStateRemoving: pbResStatus.State = pb.ResourceStatus_STATE_REMOVING case resource.NodeStateUnhealthy: - panic(fmt.Sprintf("unexpected resource.NodeState: %#v", resStatus.State)) + pbResStatus.State = pb.ResourceStatus_STATE_UNHEALTHY + if resStatus.Error != nil { + pbResStatus.Error = resStatus.Error.Error() + } } result.Resources = append(result.Resources, pbResStatus) diff --git a/robot/server/server_test.go b/robot/server/server_test.go index 565778fe4fd..6e464a2f97d 100644 --- a/robot/server/server_test.go +++ b/robot/server/server_test.go @@ -177,6 +177,30 @@ func TestServer(t *testing.T) { }, 2, }, + { + "unhealthy status", + robot.MachineStatus{ + Config: config.Revision{Revision: "rev1"}, + Resources: []resource.Status{ + { + Name: arm.Named("brokenArm"), + Revision: "rev1", + State: resource.NodeStateUnhealthy, + Error: errors.New("bad configuration"), + }, + }, + }, + &pb.ConfigStatus{Revision: "rev1"}, + []*pb.ResourceStatus{ + { + Name: protoutils.ResourceNameToProto(arm.Named("brokenArm")), + State: pb.ResourceStatus_STATE_UNHEALTHY, + Revision: "rev1", + Error: "bad configuration", + }, + }, + 0, + }, } { logger, logs := logging.NewObservedTestLogger(t) injectRobot := &inject.Robot{} From 828f8b9758817e06e5cf14d1d4d638bbed708842 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Sat, 3 Aug 2024 00:03:09 -0400 Subject: [PATCH 13/27] RSDK-6875: fix test --- robot/client/client.go | 6 +++--- robot/client/client_test.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/robot/client/client.go b/robot/client/client.go index d38cc0d2943..e9848af5e13 100644 --- a/robot/client/client.go +++ b/robot/client/client.go @@ -3,6 +3,7 @@ package client import ( "context" + "errors" "flag" "fmt" "io" @@ -15,7 +16,6 @@ import ( grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/jhump/protoreflect/desc" "github.com/jhump/protoreflect/grpcreflect" - "github.com/pkg/errors" "go.uber.org/multierr" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -131,7 +131,7 @@ func isClosedPipeError(err error) bool { } func (rc *RobotClient) notConnectedToRemoteError() error { - return errors.Errorf("not connected to remote robot at %s", rc.address) + return fmt.Errorf("not connected to remote robot at %s", rc.address) } func (rc *RobotClient) handleUnaryDisconnect( @@ -624,7 +624,7 @@ func (rc *RobotClient) resources(ctx context.Context) ([]resource.Name, []resour } svcDesc, ok := symDesc.(*desc.ServiceDescriptor) if !ok { - return nil, nil, errors.Errorf("expected descriptor to be service descriptor but got %T", symDesc) + return nil, nil, fmt.Errorf("expected descriptor to be service descriptor but got %T", symDesc) } resTypes = append(resTypes, resource.RPCAPI{ API: rprotoutils.ResourceNameFromProto(resAPI.Subtype).API, diff --git a/robot/client/client_test.go b/robot/client/client_test.go index ca8b09628c4..99f025b2be4 100644 --- a/robot/client/client_test.go +++ b/robot/client/client_test.go @@ -3,6 +3,7 @@ package client import ( "bytes" "context" + "errors" "fmt" "image" "image/png" @@ -19,7 +20,6 @@ import ( "github.com/golang/geo/r3" "github.com/google/uuid" "github.com/jhump/protoreflect/grpcreflect" - "github.com/pkg/errors" "go.uber.org/zap/zapcore" commonpb "go.viam.com/api/common/v1" armpb "go.viam.com/api/component/arm/v1" @@ -1325,12 +1325,12 @@ func TestClientDiscovery(t *testing.T) { func ensurePartsAreEqual(part, otherPart *referenceframe.FrameSystemPart) error { if part.FrameConfig.Name() != otherPart.FrameConfig.Name() { - return errors.Errorf("part had name %s while other part had name %s", part.FrameConfig.Name(), otherPart.FrameConfig.Name()) + return fmt.Errorf("part had name %s while other part had name %s", part.FrameConfig.Name(), otherPart.FrameConfig.Name()) } frameConfig := part.FrameConfig otherFrameConfig := otherPart.FrameConfig if frameConfig.Parent() != otherFrameConfig.Parent() { - return errors.Errorf("part had parent %s while other part had parent %s", frameConfig.Parent(), otherFrameConfig.Parent()) + return fmt.Errorf("part had parent %s while other part had parent %s", frameConfig.Parent(), otherFrameConfig.Parent()) } if !spatialmath.R3VectorAlmostEqual(frameConfig.Pose().Point(), otherFrameConfig.Pose().Point(), 1e-8) { return errors.New("translations of parts not equal") From 02dee8e73c0f968baf56c358cad694104479cd20 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Sat, 3 Aug 2024 13:21:53 -0400 Subject: [PATCH 14/27] fix local robot tests --- robot/impl/local_robot_test.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/robot/impl/local_robot_test.go b/robot/impl/local_robot_test.go index 8fc92758229..168942f526d 100644 --- a/robot/impl/local_robot_test.go +++ b/robot/impl/local_robot_test.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "crypto/x509" + "errors" "fmt" "math" "net" @@ -16,7 +17,7 @@ import ( "github.com/go-viper/mapstructure/v2" "github.com/golang/geo/r3" - "github.com/pkg/errors" + legacyerrors "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.uber.org/zap" // registers all components. @@ -986,7 +987,7 @@ func TestStatus(t *testing.T) { test.That(t, err, test.ShouldBeNil) _, err = r.Status(context.Background(), []resource.Name{fail1}) - test.That(t, err, test.ShouldBeError, errors.Wrapf(errFailed, "failed to get status from %q", fail1)) + test.That(t, err, test.ShouldBeError, legacyerrors.Wrapf(errFailed, "failed to get status from %q", fail1)) }) t.Run("many status", func(t *testing.T) { @@ -1024,7 +1025,7 @@ func TestStatus(t *testing.T) { test.That(t, resp[1].Status, test.ShouldResemble, expected[resp[1].Name]) _, err = r.Status(context.Background(), resourceNames) - test.That(t, err, test.ShouldBeError, errors.Wrapf(errFailed, "failed to get status from %q", fail1)) + test.That(t, err, test.ShouldBeError, legacyerrors.Wrapf(errFailed, "failed to get status from %q", fail1)) }) t.Run("get all status", func(t *testing.T) { @@ -2639,13 +2640,13 @@ func TestDependentAndOrphanedResources(t *testing.T) { if rName.API == gizmoapi.API { gizmo, ok := res.(gizmoapi.Gizmo) if !ok { - return nil, errors.Errorf("resource %s is not a gizmo", rName.Name) + return nil, fmt.Errorf("resource %s is not a gizmo", rName.Name) } newDoodad.gizmo = gizmo } } if newDoodad.gizmo == nil { - return nil, errors.Errorf("doodad %s must depend on a gizmo", conf.Name) + return nil, fmt.Errorf("doodad %s must depend on a gizmo", conf.Name) } return newDoodad, nil }, @@ -3555,9 +3556,11 @@ type mockConfig struct { Fail bool `json:"fail"` } +var errMockValidation = errors.New("whoops") + func (cfg *mockConfig) Validate(path string) ([]string, error) { if cfg.Fail { - return nil, errors.New("whoops") + return nil, errMockValidation } return []string{}, nil } @@ -3669,6 +3672,8 @@ func TestMachineStatus(t *testing.T) { t.Run("reconfigure", func(t *testing.T) { lr := setupLocalRobot(t, ctx, &config.Config{Revision: rev1}, logger) + expectedConfigError := fmt.Errorf("resource config validation error: %w", errMockValidation) + // Add a fake resource to the robot. rev2 := "rev2" builtinRev = rev2 @@ -3726,6 +3731,7 @@ func TestMachineStatus(t *testing.T) { Name: mockNamed("m"), State: resource.NodeStateUnhealthy, Revision: rev2, + Error: errors.Join(expectedConfigError), }, }, ) From b1b30db63e1988c77262e5e6231216c249329a91 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Wed, 7 Aug 2024 22:55:32 -0400 Subject: [PATCH 15/27] keep short lock --- resource/graph_node.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 744b3ec643c..09c3180978e 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -277,10 +277,9 @@ func (w *GraphNode) MarkedForRemoval() bool { // The additional `args` should come in key/value pairs for structured logging. func (w *GraphNode) LogAndSetLastError(err error, args ...any) { w.mu.Lock() - defer w.mu.Unlock() - w.lastErr = errors.Join(w.lastErr, err) w.transitionTo(NodeStateUnhealthy) + w.mu.Unlock() if w.logger != nil { w.logger.Errorw(err.Error(), args...) From f78133e2ac69ab64948ce7e57aa8fd49b79190ee Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Wed, 7 Aug 2024 23:16:42 -0400 Subject: [PATCH 16/27] update expected transitions --- resource/graph_node.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 09c3180978e..7c8be5b1108 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -485,7 +485,7 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { case NodeStateUnhealthy: //nolint switch state { - case NodeStateConfiguring, NodeStateReady, NodeStateRemoving, NodeStateUnhealthy: + case NodeStateReady, NodeStateRemoving, NodeStateUnhealthy: return true } } @@ -496,11 +496,6 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { // if the state transition is not expected. This method is not thread-safe and must be // called while holding a write lock on `mu` if accessed concurrently. func (w *GraphNode) transitionTo(state NodeState) { - if w.state == state && w.logger != nil { - w.logger.Debugw("resource state self-transition", "state", w.state.String()) - return - } - if !w.canTransitionTo(state) && w.logger != nil { w.logger.Warnw("unexpected resource state transition", "from", w.state.String(), "to", state.String()) } From f51341eef497aaf511a1720fb6a7df6bce745016 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Wed, 7 Aug 2024 23:39:25 -0400 Subject: [PATCH 17/27] revert self-transition guard --- resource/graph_node.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/resource/graph_node.go b/resource/graph_node.go index 7c8be5b1108..e2b3fe5c297 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -496,6 +496,11 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { // if the state transition is not expected. This method is not thread-safe and must be // called while holding a write lock on `mu` if accessed concurrently. func (w *GraphNode) transitionTo(state NodeState) { + if w.state == state && w.logger != nil { + w.logger.Debugw("resource state self-transition", "state", w.state.String()) + return + } + if !w.canTransitionTo(state) && w.logger != nil { w.logger.Warnw("unexpected resource state transition", "from", w.state.String(), "to", state.String()) } From a9fb365abbd9b428e40f1536e423049d4478781c Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 10:21:27 -0400 Subject: [PATCH 18/27] CR@dgottlieb: docs --- resource/graph_node.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/resource/graph_node.go b/resource/graph_node.go index e2b3fe5c297..232e08783f1 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -300,6 +300,9 @@ func (w *GraphNode) Config() Config { func (w *GraphNode) NeedsReconfigure() bool { w.mu.RLock() defer w.mu.RUnlock() + + // A resource can only become unhealthy during (re)configuration, so we can + // assume that an unhealthy node always need to be reconfigured. return w.state == NodeStateConfiguring || w.state == NodeStateUnhealthy } From 443048bcc79a9f8f1dc42937f4ce0e40ff9ba6bb Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 10:39:30 -0400 Subject: [PATCH 19/27] CR@dgottlieb/@benjirewis: docs on valid transitions --- resource/graph_node.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 232e08783f1..896d85b731f 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -488,9 +488,26 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { case NodeStateUnhealthy: //nolint switch state { - case NodeStateReady, NodeStateRemoving, NodeStateUnhealthy: + case NodeStateRemoving, NodeStateUnhealthy: + return true + case NodeStateReady: + // TODO(NEEDS TICKET): ideally, a node should "recover" by the following transition + // steps: "unhealthy" -> "configuring" -> "ready". + // + // However, once a node becomes "unhealthy" it is filtered out from most + // reconfiguration operations, and is not available to be transitioned to + // "configuring" when we want it to be. + // + // We eventually want to change the reconfiguration system to show unhealthy + // nodes instead of filtering them out at each step of the process. + // + // In the meantime, we allow nodes to "recover" by transitioning directly + // from "unhealthy" -> "ready". + // + // See this discussion for more details: https://github.com/viamrobotics/rdk/pull/4257#discussion_r1712173743 return true } + } return false } From dc565ad029a3d7b66ac0692626a092d7d9544a3e Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 10:45:06 -0400 Subject: [PATCH 20/27] CR@dgottlieb: fix CI error to include generate-go command --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6087fec7d8..3802fe45bc5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -64,9 +64,9 @@ jobs: GEN_DIFF=$(git status -s) if [ -n "$GEN_DIFF" ]; then - echo '"make build lint" resulted in the following untracked changes:' 1>&2 + echo '"make build-go lint-go generate-go" resulted in the following untracked changes:' 1>&2 git diff - echo '"make build lint" resulted in changes not in git' 1>&2 + echo '"make build-go lint-go generate-go" resulted in changes not in git' 1>&2 git status exit 1 fi @@ -137,7 +137,7 @@ jobs: GEN_DIFF=$(git status -s) if [ -n "$GEN_DIFF" ]; then - echo '"make build lint" resulted in changes not in git' 1>&2 + echo '"make build-go lint-go generate-go" resulted in changes not in git' 1>&2 git status exit 1 fi From f7527fb9356ac5a62b1f7ec02b6936f7811f59ff Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 11:15:29 -0400 Subject: [PATCH 21/27] remove "github.com/pkg/errors" package from test --- robot/impl/local_robot_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/robot/impl/local_robot_test.go b/robot/impl/local_robot_test.go index 168942f526d..ea1b812674d 100644 --- a/robot/impl/local_robot_test.go +++ b/robot/impl/local_robot_test.go @@ -17,9 +17,9 @@ import ( "github.com/go-viper/mapstructure/v2" "github.com/golang/geo/r3" - legacyerrors "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.uber.org/zap" + // registers all components. commonpb "go.viam.com/api/common/v1" armpb "go.viam.com/api/component/arm/v1" @@ -987,7 +987,8 @@ func TestStatus(t *testing.T) { test.That(t, err, test.ShouldBeNil) _, err = r.Status(context.Background(), []resource.Name{fail1}) - test.That(t, err, test.ShouldBeError, legacyerrors.Wrapf(errFailed, "failed to get status from %q", fail1)) + expectedErr := fmt.Errorf("failed to get status from %q: %w", fail1, errFailed) + test.That(t, err.Error(), test.ShouldEqual, expectedErr.Error()) }) t.Run("many status", func(t *testing.T) { @@ -1025,7 +1026,8 @@ func TestStatus(t *testing.T) { test.That(t, resp[1].Status, test.ShouldResemble, expected[resp[1].Name]) _, err = r.Status(context.Background(), resourceNames) - test.That(t, err, test.ShouldBeError, legacyerrors.Wrapf(errFailed, "failed to get status from %q", fail1)) + expectedErr := fmt.Errorf("failed to get status from %q: %w", fail1, errFailed) + test.That(t, err.Error(), test.ShouldEqual, expectedErr.Error()) }) t.Run("get all status", func(t *testing.T) { From 7f320f006475abeea527bd947321f95c5841e282 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 11:18:56 -0400 Subject: [PATCH 22/27] TODO --- robot/impl/local_robot_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/robot/impl/local_robot_test.go b/robot/impl/local_robot_test.go index ea1b812674d..53cbb88d374 100644 --- a/robot/impl/local_robot_test.go +++ b/robot/impl/local_robot_test.go @@ -987,6 +987,8 @@ func TestStatus(t *testing.T) { test.That(t, err, test.ShouldBeNil) _, err = r.Status(context.Background(), []resource.Name{fail1}) + // TODO(RSDK-6875): compare errors directly instead by string after + // `github.com/pkg/errors` is removed entirely. expectedErr := fmt.Errorf("failed to get status from %q: %w", fail1, errFailed) test.That(t, err.Error(), test.ShouldEqual, expectedErr.Error()) }) @@ -1026,6 +1028,8 @@ func TestStatus(t *testing.T) { test.That(t, resp[1].Status, test.ShouldResemble, expected[resp[1].Name]) _, err = r.Status(context.Background(), resourceNames) + // TODO(RSDK-6875): compare errors directly instead by string after + // `github.com/pkg/errors` is removed entirely. expectedErr := fmt.Errorf("failed to get status from %q: %w", fail1, errFailed) test.That(t, err.Error(), test.ShouldEqual, expectedErr.Error()) }) From ff43ecc0606bdcfca23fa00d8f2b7e89540b51b8 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 11:53:39 -0400 Subject: [PATCH 23/27] CR@dgottlieb: warn if invariants are violated --- resource/graph_node.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/resource/graph_node.go b/resource/graph_node.go index 896d85b731f..994dcac8256 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -548,6 +548,22 @@ func (w *GraphNode) resourceStatus() Status { var err error if w.state == NodeStateUnhealthy { err = w.lastErr + if err == nil && w.logger != nil { + // an unhealthy node should always have a non-nil error - log a warning if this + // invariant is violated. + w.logger.Warnw("an unhealthy node doesn't have an error", "resource", resName) + } + } else if w.lastErr != nil && w.logger != nil { + // a recovered node should not have an error - log a warning if this invariant is + // violated. + var logf = w.logger.Warnf + if w.state == NodeStateRemoving { + // a previously-unhealthy node can be marked for removal but still retain the + // error - this is not surprising so log at the debug level. + logf = w.logger.Debugf + } + logf("a node has an error but is not unhealthy", + "resource", resName, "state", w.state.String(), "error", w.lastErr) } return Status{ From b228237f6dc05811834d29ee517bb9116c1118e0 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 12:12:15 -0400 Subject: [PATCH 24/27] always return error in status unless node is ready --- resource/graph_node.go | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 994dcac8256..8494577fc35 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -507,7 +507,6 @@ func (w *GraphNode) canTransitionTo(state NodeState) bool { // See this discussion for more details: https://github.com/viamrobotics/rdk/pull/4257#discussion_r1712173743 return true } - } return false } @@ -537,6 +536,15 @@ func (w *GraphNode) ResourceStatus() Status { return w.resourceStatus() } +func (w *GraphNode) getLoggerOrGlobal() logging.Logger { + if w.logger == nil { + // This node has not yet been configured with a logger - use the global logger as + // a fall-back. + return logging.Global() + } + return w.logger +} + func (w *GraphNode) resourceStatus() Status { var resName Name if w.current == nil { @@ -545,25 +553,17 @@ func (w *GraphNode) resourceStatus() Status { resName = w.current.Name() } - var err error - if w.state == NodeStateUnhealthy { - err = w.lastErr - if err == nil && w.logger != nil { - // an unhealthy node should always have a non-nil error - log a warning if this - // invariant is violated. - w.logger.Warnw("an unhealthy node doesn't have an error", "resource", resName) - } - } else if w.lastErr != nil && w.logger != nil { - // a recovered node should not have an error - log a warning if this invariant is - // violated. - var logf = w.logger.Warnf - if w.state == NodeStateRemoving { - // a previously-unhealthy node can be marked for removal but still retain the - // error - this is not surprising so log at the debug level. - logf = w.logger.Debugf - } - logf("a node has an error but is not unhealthy", - "resource", resName, "state", w.state.String(), "error", w.lastErr) + err := w.lastErr + logger := w.getLoggerOrGlobal() + + // check invariants between state and error + switch { + case w.state == NodeStateUnhealthy && w.lastErr == nil: + logger.Warnw("an unhealthy node doesn't have an error", "resource", resName) + case w.state == NodeStateUnhealthy && w.lastErr != nil: + logger.Warnw("a ready node still has an error", "resource", resName, "error", err) + // do not return leftover error in status if the node is ready + err = nil } return Status{ From 46345a2ce957a9afef5e8eeccef20add27132488 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 12:14:04 -0400 Subject: [PATCH 25/27] lint --- robot/impl/local_robot_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/robot/impl/local_robot_test.go b/robot/impl/local_robot_test.go index 53cbb88d374..29ab1c10a69 100644 --- a/robot/impl/local_robot_test.go +++ b/robot/impl/local_robot_test.go @@ -19,7 +19,6 @@ import ( "github.com/golang/geo/r3" "go.mongodb.org/mongo-driver/bson/primitive" "go.uber.org/zap" - // registers all components. commonpb "go.viam.com/api/common/v1" armpb "go.viam.com/api/component/arm/v1" From 438b33342d51f25db97120e20e20389ccdfbb23e Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 12:33:44 -0400 Subject: [PATCH 26/27] CR@dgottlieb: document stringer usage --- resource/graph_node.go | 3 +++ tools/tools.go | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 8494577fc35..7a8adbaf5b2 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -11,6 +11,9 @@ import ( ) //go:generate stringer -type NodeState -trimprefix NodeState +// +// The directive above configures `go generate ./...` to automatically generate a +// `String` method for the NodeState type. // NodeState captures the configuration lifecycle state of a resource node. type NodeState uint8 diff --git a/tools/tools.go b/tools/tools.go index bdfea358e73..a2a40d1c478 100644 --- a/tools/tools.go +++ b/tools/tools.go @@ -13,7 +13,7 @@ import ( _ "github.com/golangci/golangci-lint/cmd/golangci-lint" _ "github.com/rhysd/actionlint" _ "golang.org/x/mobile/cmd/gomobile" - _ "golang.org/x/tools/cmd/stringer" + _ "golang.org/x/tools/cmd/stringer" // generates `String` methods for enums _ "gotest.tools/gotestsum" // only needed for proto building in examples/customresources/apis/proto From 4f9e5bf3517c27acacd6f0a39c9d4dffb020a6b5 Mon Sep 17 00:00:00 2001 From: Maxim Pertsov Date: Tue, 20 Aug 2024 13:30:29 -0400 Subject: [PATCH 27/27] fixup --- resource/graph_node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource/graph_node.go b/resource/graph_node.go index 7a8adbaf5b2..501a1411767 100644 --- a/resource/graph_node.go +++ b/resource/graph_node.go @@ -563,7 +563,7 @@ func (w *GraphNode) resourceStatus() Status { switch { case w.state == NodeStateUnhealthy && w.lastErr == nil: logger.Warnw("an unhealthy node doesn't have an error", "resource", resName) - case w.state == NodeStateUnhealthy && w.lastErr != nil: + case w.state == NodeStateReady && w.lastErr != nil: logger.Warnw("a ready node still has an error", "resource", resName, "error", err) // do not return leftover error in status if the node is ready err = nil