From ccb607ac3585962bd05ae939db1a1a97b0a37a7a Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Sat, 9 Nov 2019 10:26:39 -0800 Subject: [PATCH] [docker-syncd]: Restart SwSS, syncd and dependent services if a critical process in syncd container exits unexpectedly (#3534) Add the same mechanism I developed for the SwSS service in #2845 to the syncd service. However, in order to cause the SwSS service to also exit and restart in this situation, I developed a docker-wait-any program which the SwSS service uses to wait for either the swss or syncd containers to exit. --- .../build_templates/sonic_debian_extension.j2 | 3 + files/image_config/misc/docker-wait-any | 62 +++++++++++++++++++ files/scripts/swss.sh | 17 ++++- platform/barefoot/docker-syncd-bfn-rpc.mk | 1 + .../barefoot/docker-syncd-bfn/Dockerfile.j2 | 2 + .../docker-syncd-bfn/critical_processes | 1 + platform/broadcom/docker-syncd-brcm-rpc.mk | 2 +- .../broadcom/docker-syncd-brcm/Dockerfile.j2 | 2 + .../docker-syncd-brcm/critical_processes | 2 + .../docker-syncd-brcm/supervisord.conf | 8 ++- platform/cavium/docker-syncd-cavm-rpc.mk | 1 + platform/cavium/docker-syncd-cavm.mk | 1 + .../cavium/docker-syncd-cavm/Dockerfile.j2 | 2 + .../docker-syncd-cavm/critical_processes | 1 + .../cavium/docker-syncd-cavm/supervisord.conf | 8 ++- platform/centec/docker-syncd-centec-rpc.mk | 1 + platform/centec/docker-syncd-centec.mk | 1 + .../centec/docker-syncd-centec/Dockerfile.j2 | 2 + .../docker-syncd-centec/critical_processes | 1 + .../docker-syncd-centec/supervisord.conf | 8 ++- .../marvell-arm64/docker-syncd-mrvl-rpc.mk | 1 + .../docker-syncd-mrvl/Dockerfile.j2 | 2 + .../docker-syncd-mrvl/critical_processes | 1 + .../marvell-armhf/docker-syncd-mrvl-rpc.mk | 1 + .../docker-syncd-mrvl/Dockerfile.j2 | 2 + .../docker-syncd-mrvl/critical_processes | 1 + platform/marvell/docker-syncd-mrvl-rpc.mk | 1 + .../marvell/docker-syncd-mrvl/Dockerfile.j2 | 2 + .../docker-syncd-mrvl/critical_processes | 1 + .../docker-syncd-mrvl/supervisord.conf | 8 ++- platform/mellanox/docker-syncd-mlnx-rpc.mk | 1 + .../mellanox/docker-syncd-mlnx/Dockerfile.j2 | 2 + .../docker-syncd-mlnx/critical_processes | 1 + .../docker-syncd-mlnx/supervisord.conf | 8 ++- .../nephos/docker-syncd-nephos/Dockerfile.j2 | 2 + .../docker-syncd-nephos/critical_processes | 2 + .../docker-syncd-nephos/supervisord.conf | 8 ++- platform/template/docker-syncd-base.mk | 2 + 38 files changed, 164 insertions(+), 8 deletions(-) create mode 100755 files/image_config/misc/docker-wait-any create mode 100644 platform/barefoot/docker-syncd-bfn/critical_processes create mode 100644 platform/broadcom/docker-syncd-brcm/critical_processes create mode 100644 platform/cavium/docker-syncd-cavm/critical_processes create mode 100644 platform/centec/docker-syncd-centec/critical_processes create mode 100644 platform/marvell-arm64/docker-syncd-mrvl/critical_processes create mode 100644 platform/marvell-armhf/docker-syncd-mrvl/critical_processes create mode 100644 platform/marvell/docker-syncd-mrvl/critical_processes create mode 100644 platform/mellanox/docker-syncd-mlnx/critical_processes create mode 100644 platform/nephos/docker-syncd-nephos/critical_processes diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index d81815284a34..13d5432e09ea 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -218,6 +218,9 @@ sudo cp $IMAGE_CONFIGS/hostname/hostname-config.service $FILESYSTEM_ROOT/etc/sy echo "hostname-config.service" | sudo tee -a $GENERATED_SERVICE_FILE sudo cp $IMAGE_CONFIGS/hostname/hostname-config.sh $FILESYSTEM_ROOT/usr/bin/ +# Copy miscellaneous scripts +sudo cp $IMAGE_CONFIGS/misc/docker-wait-any $FILESYSTEM_ROOT/usr/bin/ + # Copy updategraph script and service file j2 files/build_templates/updategraph.service.j2 | sudo tee $FILESYSTEM_ROOT/etc/systemd/system/updategraph.service sudo cp $IMAGE_CONFIGS/updategraph/updategraph $FILESYSTEM_ROOT/usr/bin/ diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any new file mode 100755 index 000000000000..3988a9fbdf57 --- /dev/null +++ b/files/image_config/misc/docker-wait-any @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +""" + docker-wait-any + This script takes one or more Docker container names as arguments, + and it will block indefinitely while all of the specified containers + are running. If any of the specified containers stop, the script will + exit. + This script was created because the 'docker wait' command is lacking + this functionality. It will block until ALL specified containers have + stopped running. Here, we spawn multiple threads and wait on one + container per thread. If any of the threads exit, the entire + application will exit. + NOTE: This script is written against docker-py version 1.6.0. Newer + versions of docker-py have a different API. +""" + +import sys +import threading +from docker import Client + +# Instantiate a global event to share among our threads +g_thread_exit_event = threading.Event() + + +def usage(): + print("Usage: {} [ ...]".format(sys.argv[0])) + sys.exit(1) + + +def wait_for_container(docker_client, container_name): + docker_client.wait(container_name) + + print("No longer waiting on container '{}'".format(container_name)) + + # Signal the main thread to exit + g_thread_exit_event.set() + + +def main(): + thread_list = [] + + docker_client = Client(base_url='unix://var/run/docker.sock') + + # Ensure we were passed at least one argument + if len(sys.argv) < 2: + usage() + + container_names = sys.argv[1:] + + for container_name in container_names: + t = threading.Thread(target=wait_for_container, args=[docker_client, container_name]) + t.daemon = True + t.start() + thread_list.append(t) + + # Wait until we receive an event signifying one of the containers has stopped + g_thread_exit_event.wait() + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 5b2f582ef4c4..65eaf08148a8 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -131,7 +131,22 @@ start() { wait() { start_peer_and_dependent_services - /usr/bin/${SERVICE}.sh wait + + # Allow some time for peer container to start + # NOTE: This assumes Docker containers share the same names as their + # corresponding services + for SECS in {1..60}; do + RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER}) + if [[ x"$RUNNING" == x"true" ]]; then + break + else + sleep 1 + fi + done + + # NOTE: This assumes Docker containers share the same names as their + # corresponding services + /usr/bin/docker-wait-any ${SERVICE} ${PEER} } stop() { diff --git a/platform/barefoot/docker-syncd-bfn-rpc.mk b/platform/barefoot/docker-syncd-bfn-rpc.mk index 4fbe4eb1721c..d9cb0b5d6172 100644 --- a/platform/barefoot/docker-syncd-bfn-rpc.mk +++ b/platform/barefoot/docker-syncd-bfn-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_BFN_RPC = docker-syncd-bfn-rpc.gz $(DOCKER_SYNCD_BFN_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-bfn-rpc $(DOCKER_SYNCD_BFN_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_BFN_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_BFN_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/barefoot/docker-syncd-bfn/Dockerfile.j2 b/platform/barefoot/docker-syncd-bfn/Dockerfile.j2 index 73a12b8a05cc..c12c36b1260b 100755 --- a/platform/barefoot/docker-syncd-bfn/Dockerfile.j2 +++ b/platform/barefoot/docker-syncd-bfn/Dockerfile.j2 @@ -29,6 +29,8 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/barefoot/docker-syncd-bfn/critical_processes b/platform/barefoot/docker-syncd-bfn/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/barefoot/docker-syncd-bfn/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/broadcom/docker-syncd-brcm-rpc.mk b/platform/broadcom/docker-syncd-brcm-rpc.mk index ed7fc1b5265e..80d6ea2d8147 100644 --- a/platform/broadcom/docker-syncd-brcm-rpc.mk +++ b/platform/broadcom/docker-syncd-brcm-rpc.mk @@ -9,7 +9,7 @@ $(DOCKER_SYNCD_BRCM_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSAIMETADATA_DBG) \ $(LIBSAIREDIS_DBG) endif -$(DOCKER_SYNCD_BRCM_RPC)_FILES += $(DSSERVE) $(BCMCMD) +$(DOCKER_SYNCD_BRCM_RPC)_FILES += $(DSSERVE) $(BCMCMD) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) $(DOCKER_SYNCD_BRCM_RPC)_LOAD_DOCKERS += $(DOCKER_SYNCD_BASE) SONIC_DOCKER_IMAGES += $(DOCKER_SYNCD_BRCM_RPC) SONIC_STRETCH_DOCKERS += $(DOCKER_SYNCD_BRCM_RPC) diff --git a/platform/broadcom/docker-syncd-brcm/Dockerfile.j2 b/platform/broadcom/docker-syncd-brcm/Dockerfile.j2 index 328f698fdb6d..b20e353f842f 100755 --- a/platform/broadcom/docker-syncd-brcm/Dockerfile.j2 +++ b/platform/broadcom/docker-syncd-brcm/Dockerfile.j2 @@ -26,6 +26,8 @@ COPY ["files/dsserve", "files/bcmcmd", "start.sh", "bcmsh", "/usr/bin/"] RUN chmod +x /usr/bin/dsserve /usr/bin/bcmcmd COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/broadcom/docker-syncd-brcm/critical_processes b/platform/broadcom/docker-syncd-brcm/critical_processes new file mode 100644 index 000000000000..489668a89e08 --- /dev/null +++ b/platform/broadcom/docker-syncd-brcm/critical_processes @@ -0,0 +1,2 @@ +dsserve +syncd diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index a2e0743b1cf5..cd6712acbf22 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/cavium/docker-syncd-cavm-rpc.mk b/platform/cavium/docker-syncd-cavm-rpc.mk index cb1077c5a391..e57370fce5ca 100644 --- a/platform/cavium/docker-syncd-cavm-rpc.mk +++ b/platform/cavium/docker-syncd-cavm-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_CAVM_RPC = docker-syncd-cavm-rpc.gz $(DOCKER_SYNCD_CAVM_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-cavm-rpc $(DOCKER_SYNCD_CAVM_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) $(CAVM_LIBSAI) $(XP_TOOLS) $(REDIS_TOOLS) +$(DOCKER_SYNCD_CAVM_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_CAVM_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/cavium/docker-syncd-cavm.mk b/platform/cavium/docker-syncd-cavm.mk index 5cec8390593f..a136828dbff1 100644 --- a/platform/cavium/docker-syncd-cavm.mk +++ b/platform/cavium/docker-syncd-cavm.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_CAVM = docker-syncd-cavm.gz $(DOCKER_SYNCD_CAVM)_PATH = $(PLATFORM_PATH)/docker-syncd-cavm $(DOCKER_SYNCD_CAVM)_DEPENDS += $(SYNCD) $(CAVM_LIBSAI) $(XP_TOOLS) $(REDIS_TOOLS) +$(DOCKER_SYNCD_CAVM)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_CAVM)_DEPENDS += $(SYNCD_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/cavium/docker-syncd-cavm/Dockerfile.j2 b/platform/cavium/docker-syncd-cavm/Dockerfile.j2 index 800be7e1bb04..0e2bc4b6bbc7 100755 --- a/platform/cavium/docker-syncd-cavm/Dockerfile.j2 +++ b/platform/cavium/docker-syncd-cavm/Dockerfile.j2 @@ -23,6 +23,8 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] COPY ["profile.ini", "/etc/ssw/AS7512/"] diff --git a/platform/cavium/docker-syncd-cavm/critical_processes b/platform/cavium/docker-syncd-cavm/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/cavium/docker-syncd-cavm/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index 1af5d70a1d0c..c823ab5680ef 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/centec/docker-syncd-centec-rpc.mk b/platform/centec/docker-syncd-centec-rpc.mk index 744f60a71ff1..71c8ef7753c1 100644 --- a/platform/centec/docker-syncd-centec-rpc.mk +++ b/platform/centec/docker-syncd-centec-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_CENTEC_RPC = docker-syncd-centec-rpc.gz $(DOCKER_SYNCD_CENTEC_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-centec-rpc $(DOCKER_SYNCD_CENTEC_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_CENTEC_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_CENTEC_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/centec/docker-syncd-centec.mk b/platform/centec/docker-syncd-centec.mk index 7447dd54717b..360690731a58 100644 --- a/platform/centec/docker-syncd-centec.mk +++ b/platform/centec/docker-syncd-centec.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_CENTEC = docker-syncd-centec.gz $(DOCKER_SYNCD_CENTEC)_PATH = $(PLATFORM_PATH)/docker-syncd-centec $(DOCKER_SYNCD_CENTEC)_DEPENDS += $(SYNCD) +$(DOCKER_SYNCD_CENTEC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_CENTEC)_DEPENDS += $(SYNCD_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/centec/docker-syncd-centec/Dockerfile.j2 b/platform/centec/docker-syncd-centec/Dockerfile.j2 index d5b9bc73eeca..b40103a24f28 100755 --- a/platform/centec/docker-syncd-centec/Dockerfile.j2 +++ b/platform/centec/docker-syncd-centec/Dockerfile.j2 @@ -24,6 +24,8 @@ RUN apt-get install -f kmod COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/centec/docker-syncd-centec/critical_processes b/platform/centec/docker-syncd-centec/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/centec/docker-syncd-centec/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index 1af5d70a1d0c..c823ab5680ef 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/marvell-arm64/docker-syncd-mrvl-rpc.mk b/platform/marvell-arm64/docker-syncd-mrvl-rpc.mk index f5eb6d2de272..0e1b65f2fd5d 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl-rpc.mk +++ b/platform/marvell-arm64/docker-syncd-mrvl-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_MRVL_RPC = docker-syncd-mrvl-rpc.gz $(DOCKER_SYNCD_MRVL_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mrvl-rpc $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_MRVL_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/marvell-arm64/docker-syncd-mrvl/Dockerfile.j2 b/platform/marvell-arm64/docker-syncd-mrvl/Dockerfile.j2 index 201d52284361..410911e6a4f8 100755 --- a/platform/marvell-arm64/docker-syncd-mrvl/Dockerfile.j2 +++ b/platform/marvell-arm64/docker-syncd-mrvl/Dockerfile.j2 @@ -28,6 +28,8 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/marvell-arm64/docker-syncd-mrvl/critical_processes b/platform/marvell-arm64/docker-syncd-mrvl/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/marvell-arm64/docker-syncd-mrvl/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/marvell-armhf/docker-syncd-mrvl-rpc.mk b/platform/marvell-armhf/docker-syncd-mrvl-rpc.mk index f5eb6d2de272..0e1b65f2fd5d 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl-rpc.mk +++ b/platform/marvell-armhf/docker-syncd-mrvl-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_MRVL_RPC = docker-syncd-mrvl-rpc.gz $(DOCKER_SYNCD_MRVL_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mrvl-rpc $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_MRVL_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/marvell-armhf/docker-syncd-mrvl/Dockerfile.j2 b/platform/marvell-armhf/docker-syncd-mrvl/Dockerfile.j2 index 201d52284361..410911e6a4f8 100755 --- a/platform/marvell-armhf/docker-syncd-mrvl/Dockerfile.j2 +++ b/platform/marvell-armhf/docker-syncd-mrvl/Dockerfile.j2 @@ -28,6 +28,8 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/marvell-armhf/docker-syncd-mrvl/critical_processes b/platform/marvell-armhf/docker-syncd-mrvl/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/marvell-armhf/docker-syncd-mrvl/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/marvell/docker-syncd-mrvl-rpc.mk b/platform/marvell/docker-syncd-mrvl-rpc.mk index 375c6ba91cd1..5b1968bde39c 100644 --- a/platform/marvell/docker-syncd-mrvl-rpc.mk +++ b/platform/marvell/docker-syncd-mrvl-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_MRVL_RPC = docker-syncd-mrvl-rpc.gz $(DOCKER_SYNCD_MRVL_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mrvl-rpc $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_MRVL_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_MRVL_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/marvell/docker-syncd-mrvl/Dockerfile.j2 b/platform/marvell/docker-syncd-mrvl/Dockerfile.j2 index 889971652c61..40973653e8d3 100755 --- a/platform/marvell/docker-syncd-mrvl/Dockerfile.j2 +++ b/platform/marvell/docker-syncd-mrvl/Dockerfile.j2 @@ -23,6 +23,8 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "syncd.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/marvell/docker-syncd-mrvl/critical_processes b/platform/marvell/docker-syncd-mrvl/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/marvell/docker-syncd-mrvl/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index 1e015fef931f..aea4d45b9afd 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/mellanox/docker-syncd-mlnx-rpc.mk b/platform/mellanox/docker-syncd-mlnx-rpc.mk index f154f8c28d2e..608c1bb3ad20 100644 --- a/platform/mellanox/docker-syncd-mlnx-rpc.mk +++ b/platform/mellanox/docker-syncd-mlnx-rpc.mk @@ -3,6 +3,7 @@ DOCKER_SYNCD_MLNX_RPC = docker-syncd-mlnx-rpc.gz $(DOCKER_SYNCD_MLNX_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mlnx-rpc $(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) +$(DOCKER_SYNCD_MLNX_RPC)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) ifeq ($(INSTALL_DEBUG_TOOLS), y) $(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC_DBG) \ $(LIBSWSSCOMMON_DBG) \ diff --git a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 index d3716dffcbbc..4d22335ec783 100755 --- a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 +++ b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 @@ -35,5 +35,7 @@ RUN apt-get clean -y && \ COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ENTRYPOINT ["/usr/bin/supervisord"] diff --git a/platform/mellanox/docker-syncd-mlnx/critical_processes b/platform/mellanox/docker-syncd-mlnx/critical_processes new file mode 100644 index 000000000000..6082f242b872 --- /dev/null +++ b/platform/mellanox/docker-syncd-mlnx/critical_processes @@ -0,0 +1 @@ +syncd diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index 1af5d70a1d0c..c823ab5680ef 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/nephos/docker-syncd-nephos/Dockerfile.j2 b/platform/nephos/docker-syncd-nephos/Dockerfile.j2 index b889200f6ca4..f891ace1e8e4 100755 --- a/platform/nephos/docker-syncd-nephos/Dockerfile.j2 +++ b/platform/nephos/docker-syncd-nephos/Dockerfile.j2 @@ -36,6 +36,8 @@ COPY ["files/dsserve", "files/npx_diag", "start.sh", "/usr/bin/"] RUN chmod +x /usr/bin/npx_diag /usr/bin/dsserve COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor/"] ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y diff --git a/platform/nephos/docker-syncd-nephos/critical_processes b/platform/nephos/docker-syncd-nephos/critical_processes new file mode 100644 index 000000000000..489668a89e08 --- /dev/null +++ b/platform/nephos/docker-syncd-nephos/critical_processes @@ -0,0 +1,2 @@ +dsserve +syncd diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index 1af5d70a1d0c..c823ab5680ef 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/platform/template/docker-syncd-base.mk b/platform/template/docker-syncd-base.mk index d8776d179a02..cefcf15ebd62 100644 --- a/platform/template/docker-syncd-base.mk +++ b/platform/template/docker-syncd-base.mk @@ -7,6 +7,8 @@ DOCKER_SYNCD_BASE_DBG = $(DOCKER_SYNCD_BASE_STEM)-$(DBG_IMAGE_MARK).gz $(DOCKER_SYNCD_BASE)_PATH = $(PLATFORM_PATH)/docker-syncd-$(DOCKER_SYNCD_PLATFORM_CODE) +$(DOCKER_SYNCD_BASE)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) + $(DOCKER_SYNCD_BASE)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE_STRETCH) $(DOCKER_SYNCD_BASE)_DBG_DEPENDS += $($(DOCKER_CONFIG_ENGINE_STRETCH)_DBG_DEPENDS)