From 8d0050a372338681a6d3d55d59a2215d47589ed1 Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <vrothberg@redhat.com>
Date: Thu, 4 May 2023 11:00:17 +0200
Subject: [PATCH] container stop: handle race

There is an inherent race when stopping/killing a container with other
processes attempting to do the same and also with the container having
exited in the meantime.  In those cases, the OCI runtime may fail to
kill the container as it has already exited.

Handle those races by first checking if the container state has changed
before returning the error.

[NO NEW TESTS NEEDED] - as it's a hard to test race.

Fixes: #18452
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
---
 libpod/container_internal.go | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 9be8b9397e..cf4fa1e0cb 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1369,12 +1369,6 @@ func (c *Container) stop(timeout uint) error {
 		}
 	}
 
-	// We have to check stopErr *after* we lock again - otherwise, we have a
-	// change of panicking on a double-unlock. Ref: GH Issue 9615
-	if stopErr != nil {
-		return stopErr
-	}
-
 	// Since we're now subject to a race condition with other processes who
 	// may have altered the state (and other data), let's check if the
 	// state has changed.  If so, we should return immediately and leave
@@ -1387,6 +1381,17 @@ func (c *Container) stop(timeout uint) error {
 		return nil
 	}
 
+	// We have to check stopErr *after* we lock again - otherwise, we have a
+	// chance of panicking on a double-unlock (see #9615).
+	//
+	// If the state has changed, stopErr is ignored (see #18452) since we
+	// should optimistically assume that another process has already
+	// stopped/killed the container or that the container has exited in the
+	// meantime.
+	if stopErr != nil {
+		return stopErr
+	}
+
 	c.newContainerEvent(events.Stop)
 	return c.waitForConmonToExitAndSave()
 }