From 4b9be118a773110c6006979fca9c3c9bf4e1b5f4 Mon Sep 17 00:00:00 2001 From: chenjiankun Date: Thu, 3 Mar 2022 15:48:25 +0800 Subject: [PATCH] docker:fix container status not consistent with its shim process status 1. fix containerd-shim residual when kill containerd during start container If containerd is killed after shim and container init process started, new containerd process will not clean them during load-task. But both of t.Start and t.Delete in docker failed because it cannot connect to containerd. In the meanwhile, docker have not received container start event yet, so it will not set container status to running. All of above caused shim and container init process residual but container status from docker is Created. Even after container is deleted, shim and init process still exist. So we add runc delete --force if t.Start failed, which do not need to send signal through containerd to kill container process. 2. fix shim killed but container status is running In the similar scene with 1, shim and container init process started, and start event is sent to dockerd. But containerd is killed and new containerd process is started before t.Delete, shim will be killed but container init process is still working, dockerd will not receive process exit event. So dockerd shows container is running but actually shim is killed. So we add runc delete --force if t.Start failed to kill container init process. --- VERSION-openeuler | 2 +- docker-engine-openeuler.spec | 6 ++ ...iner-status-not-consistent-with-its-.patch | 88 +++++++++++++++++++ series.conf | 1 + 4 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 patch/0196-docker-fix-container-status-not-consistent-with-its-.patch diff --git a/VERSION-openeuler b/VERSION-openeuler index c75fc47..5f292c2 100644 --- a/VERSION-openeuler +++ b/VERSION-openeuler @@ -1 +1 @@ -18.09.0.207 +18.09.0.208 diff --git a/docker-engine-openeuler.spec b/docker-engine-openeuler.spec index cb95afc..1a8ffc9 100644 --- a/docker-engine-openeuler.spec +++ b/docker-engine-openeuler.spec @@ -201,6 +201,12 @@ fi %endif %changelog +* Thu Mar 3 2022 chenjiankun - 18.09.0-207 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:fix container status not consistent with its shim process status + * Thu Mar 3 2022 chenjiankun - 18.09.0-207 - Type:bugfix - ID:NA diff --git a/patch/0196-docker-fix-container-status-not-consistent-with-its-.patch b/patch/0196-docker-fix-container-status-not-consistent-with-its-.patch new file mode 100644 index 0000000..3b6658c --- /dev/null +++ b/patch/0196-docker-fix-container-status-not-consistent-with-its-.patch @@ -0,0 +1,88 @@ +From 8034f96d1500dac8af17449b9dba01b07b956a04 Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Tue, 2 Mar 2021 09:31:44 +0800 +Subject: [PATCH] docker: fix container status not consistent with its shim + process status + +1. fix containerd-shim residual when kill containerd during start container +If containerd is killed after shim and container init process started, +new containerd process will not clean them during load-task. +But both of t.Start and t.Delete in docker failed because it cannot +connect to containerd. In the meanwhile, docker have not received container +start event yet, so it will not set container status to running. +All of above caused shim and container init process residual but +container status from docker is Created. Even after container is +deleted, shim and init process still exist. +So we add runc delete --force if t.Start failed, which do not need to +send signal through containerd to kill container process. + +2. fix shim killed but container status is running +In the similar scene with 1, shim and container init process started, +and start event is sent to dockerd. But containerd is killed and new +containerd process is started before t.Delete, shim will be killed but +container init process is still working, dockerd will not receive +process exit event. So dockerd shows container is running but actually +shim is killed. +So we add runc delete --force if t.Start failed to kill container init +process. + +Signed-off-by: xiadanni +--- + components/engine/libcontainerd/client_daemon.go | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/components/engine/libcontainerd/client_daemon.go b/components/engine/libcontainerd/client_daemon.go +index 502796b..9c65e54 100755 +--- a/components/engine/libcontainerd/client_daemon.go ++++ b/components/engine/libcontainerd/client_daemon.go +@@ -8,6 +8,7 @@ import ( + "fmt" + "io" + "os" ++ "os/exec" + "path/filepath" + "reflect" + "runtime" +@@ -317,10 +318,9 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin + close(stdinCloseSync) + + if err := t.Start(ctx); err != nil { +- if _, err := t.Delete(ctx); err != nil { +- c.logger.WithError(err).WithField("container", id). +- Error("failed to delete task after fail start") +- } ++ exec.Command("runc", "--root", "/var/run/docker/runtime-runc/moby", "delete", "--force", id).Run() ++ _, errD := t.Delete(ctx) ++ logrus.Warnf("container %v start failed, delete task, delete err: %v", id, errD) + ctr.setTask(nil) + return -1, wrapError(err) + } +@@ -916,10 +916,7 @@ func (c *client) processEventStream(ctx context.Context, ns string) { + c.logger.WithField("container", ei.ContainerID).Warn("unknown container") + if et == EventExit && ei.ProcessID == ei.ContainerID && c.backend.IsContainerRunning(ei.ContainerID) { + c.logger.WithField("container", ei.ContainerID).Warn("handle exit event force ...") +- c.eventQ.append(ei.ContainerID, func() { +- c.logger.WithField("container", ei.ContainerID).Warnf("handle exit event force: error=%v", +- c.backend.ProcessEvent(ei.ContainerID, et, ei)) +- }) ++ c.processOrphanEvent(ctr, et, ei) + } + continue + } +@@ -935,6 +932,13 @@ func (c *client) processEventStream(ctx context.Context, ns string) { + } + } + ++func (c *client) processOrphanEvent(ctr *container, et EventType, ei EventInfo) { ++ c.eventQ.append(ei.ContainerID, func() { ++ c.logger.WithField("container", ei.ContainerID).Warnf("handle exit event force: error=%v", ++ c.backend.ProcessEvent(ei.ContainerID, et, ei)) ++ }) ++} ++ + func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) { + writer, err := c.client.ContentStore().Writer(ctx, content.WithRef(ref)) + if err != nil { +-- +1.8.3.1 + diff --git a/series.conf b/series.conf index 5d5a287..be9247f 100644 --- a/series.conf +++ b/series.conf @@ -189,4 +189,5 @@ patch/0192-docker-update-seccomp-whitelist-to-Linux-5.10-syscal.patch patch/0193-docker-fix-images-filter-when-use-multi-reference.patch patch/0194-docker-fix-docker-rmi-stucking.patch patch/0195-docker-fix-network-sandbox-not-cleaned-up-on-failure.patch +patch/0196-docker-fix-container-status-not-consistent-with-its-.patch #end -- Gitee