Skip to content

Commit

Permalink
Use gnmi container instead of telemetry container
Browse files Browse the repository at this point in the history
  • Loading branch information
ganglyu committed Oct 13, 2023
1 parent 1545579 commit 482a400
Show file tree
Hide file tree
Showing 27 changed files with 315 additions and 84 deletions.
2 changes: 1 addition & 1 deletion Makefile.work
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ SONIC_BUILD_INSTRUCTION := $(MAKE) \
DOCKER_LOCKDIR=$(DOCKER_LOCKDIR) \
DOCKER_LOCKFILE_SAVE=$(DOCKER_LOCKFILE_SAVE) \
SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD=$(SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD) \
SONIC_INCLUDE_SYSTEM_TELEMETRY=$(INCLUDE_SYSTEM_TELEMETRY) \
SONIC_INCLUDE_SYSTEM_GNMI=$(INCLUDE_SYSTEM_GNMI) \
INCLUDE_DHCP_RELAY=$(INCLUDE_DHCP_RELAY) \
INCLUDE_DHCP_SERVER=$(INCLUDE_DHCP_SERVER) \
INCLUDE_MACSEC=$(INCLUDE_MACSEC) \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ ENV IMAGE_VERSION=$image_version

RUN apt-get update

{% if docker_sonic_telemetry_debs.strip() -%}
{% if docker_sonic_gnmi_debs.strip() -%}
# Copy locally-built Debian package dependencies
{{ copy_files("debs/", docker_sonic_telemetry_debs.split(' '), "/debs/") }}
{{ copy_files("debs/", docker_sonic_gnmi_debs.split(' '), "/debs/") }}

# Install locally-built Debian packages and implicitly install their dependencies
{{ install_debian_packages(docker_sonic_telemetry_debs.split(' ')) }}
{{ install_debian_packages(docker_sonic_gnmi_debs.split(' ')) }}
{%- endif %}

RUN apt-get clean -y && \
apt-get autoclean - && \
apt-get autoremove -y && \
rm -rf /debs

COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"]
COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"]
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
COPY ["docker_init.sh", "start.sh", "gnmi-native.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"]
COPY ["telemetry_vars.j2.gnmi", "telemetry_vars.j2.telemetry", "/usr/share/sonic/templates/"]
COPY ["supervisord.conf.gnmi", "supervisord.conf.telemetry", "/etc/supervisor/conf.d/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]
COPY ["critical_processes.gnmi", "critical_processes.telemetry", "/etc/supervisor/"]

ENTRYPOINT ["/usr/local/bin/supervisord"]
ENTRYPOINT ["/usr/bin/docker_init.sh"]
5 changes: 5 additions & 0 deletions dockers/docker-sonic-gnmi/base_image_files/monit_gnmi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
###############################################################################
## Monit configuration for telemetry container
###############################################################################
check program container_memory_gnmi with path "/usr/bin/memory_checker gnmi 419430400"
if status == 3 for 10 times within 20 cycles then exec "/usr/bin/restart_service gnmi" repeat every 2 cycles
1 change: 1 addition & 0 deletions dockers/docker-sonic-gnmi/critical_processes.gnmi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
program:gnmi-native
File renamed without changes.
7 changes: 7 additions & 0 deletions dockers/docker-sonic-gnmi/docker_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

cp /usr/share/sonic/templates/telemetry_vars.j2.${CONTAINER_NAME} /usr/share/sonic/templates/telemetry_vars.j2
cp /etc/supervisor/conf.d/supervisord.conf.${CONTAINER_NAME} /etc/supervisor/conf.d/supervisord.conf
cp /etc/supervisor/critical_processes.${CONTAINER_NAME} /etc/supervisor/critical_processes

exec /usr/local/bin/supervisord
105 changes: 105 additions & 0 deletions dockers/docker-sonic-gnmi/gnmi-native.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env bash

EXIT_TELEMETRY_VARS_FILE_NOT_FOUND=1
INCORRECT_TELEMETRY_VALUE=2
TELEMETRY_VARS_FILE=/usr/share/sonic/templates/telemetry_vars.j2

if [ ! -f "$TELEMETRY_VARS_FILE" ]; then
echo "Telemetry vars template file not found"
exit $EXIT_TELEMETRY_VARS_FILE_NOT_FOUND
fi

# Try to read telemetry and certs config from ConfigDB.
# Use default value if no valid config exists
TELEMETRY_VARS=$(sonic-cfggen -d -t $TELEMETRY_VARS_FILE)
TELEMETRY_VARS=${TELEMETRY_VARS//[\']/\"}
X509=$(echo $TELEMETRY_VARS | jq -r '.x509')
GNMI=$(echo $TELEMETRY_VARS | jq -r '.gnmi')
CERTS=$(echo $TELEMETRY_VARS | jq -r '.certs')

TELEMETRY_ARGS=" -logtostderr"
export CVL_SCHEMA_PATH=/usr/sbin/schema

if [ -n "$CERTS" ]; then
SERVER_CRT=$(echo $CERTS | jq -r '.server_crt')
SERVER_KEY=$(echo $CERTS | jq -r '.server_key')
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
TELEMETRY_ARGS+=" --insecure"
else
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
fi

CA_CRT=$(echo $CERTS | jq -r '.ca_crt')
if [ ! -z $CA_CRT ]; then
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
fi
elif [ -n "$X509" ]; then
SERVER_CRT=$(echo $X509 | jq -r '.server_crt')
SERVER_KEY=$(echo $X509 | jq -r '.server_key')
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
TELEMETRY_ARGS+=" --insecure"
else
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
fi

CA_CRT=$(echo $X509 | jq -r '.ca_crt')
if [ ! -z $CA_CRT ]; then
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
fi
else
TELEMETRY_ARGS+=" --noTLS"
fi

# If no configuration entry exists for TELEMETRY, create one default port
if [ -z "$GNMI" ]; then
PORT=8080
else
PORT=$(echo $GNMI | jq -r '.port')
fi
TELEMETRY_ARGS+=" --port $PORT"

CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth')
if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "false" ]; then
TELEMETRY_ARGS+=" --allow_no_client_auth"
fi

LOG_LEVEL=$(echo $GNMI | jq -r '.log_level')
if [[ $LOG_LEVEL =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" -v=$LOG_LEVEL"
else
TELEMETRY_ARGS+=" -v=2"
fi

# Enable ZMQ for SmartSwitch
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
fi

# Server will handle threshold connections consecutively
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS"
else
if [ -z $GNMI ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
TELEMETRY_ARGS+=" --threshold 100"
else
echo "Incorrect threshold value, expecting positive integers" >&2
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

# Close idle connections after certain duration (in seconds)
IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration')
if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION"
else
if [ -z $GNMI ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration 5"
else
echo "Incorrect idle_conn_duration value, expecting positive integers" >&2
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

exec /usr/sbin/telemetry ${TELEMETRY_ARGS}
2 changes: 1 addition & 1 deletion dockers/docker-sonic-telemetry/start.sh → dockers/docker-sonic-gnmi/start.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fi
CTR_SCRIPT="/usr/share/sonic/scripts/container_startup.py"
if test -f ${CTR_SCRIPT}
then
${CTR_SCRIPT} -f telemetry -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION}
${CTR_SCRIPT} -f ${CONTAINER_NAME} -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION}
fi

mkdir -p /var/sonic
Expand Down
60 changes: 60 additions & 0 deletions dockers/docker-sonic-gnmi/supervisord.conf.gnmi
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name gnmi
events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING
autostart=true
autorestart=false
buffer_size=1024

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true

[program:start]
command=/usr/bin/start.sh
priority=2
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running

[program:gnmi-native]
command=/usr/bin/gnmi-native.sh
priority=3
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=start:exited

[program:dialout]
command=/usr/bin/dialout.sh
priority=4
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=gnmi-native:running
8 changes: 1 addition & 7 deletions dockers/docker-sonic-telemetry/telemetry.sh → dockers/docker-sonic-gnmi/telemetry.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ else
TELEMETRY_ARGS+=" -v=2"
fi

# Enable ZMQ for SmartSwitch
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
fi

# Server will handle threshold connections consecutively
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
Expand All @@ -101,6 +95,6 @@ else
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

TELEMETRY_ARGS+=" -gnmi_native_write=false"

exec /usr/sbin/telemetry ${TELEMETRY_ARGS}
5 changes: 5 additions & 0 deletions dockers/docker-sonic-gnmi/telemetry_vars.j2.gnmi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"certs": {% if "certs" in GNMI.keys() %}{{ GNMI["certs"] }}{% else %}""{% endif %},
"gnmi" : {% if "gnmi" in GNMI.keys() %}{{ GNMI["gnmi"] }}{% else %}""{% endif %},
"x509" : {% if "x509" in DEVICE_METADATA.keys() %}{{ DEVICE_METADATA["x509"] }}{% else %}""{% endif %}
}
7 changes: 6 additions & 1 deletion files/build_templates/docker_image_ctl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,12 @@ kill() {

DOCKERNAME={{docker_container_name}}
OP=$1
DEV=$2 # namespace/device number to operate on
{%- if docker_container_name == "gnmi" %}
DOCKERNAME=$2 # Support gnmi and telemetry container
DEV=$3 # namespace/device number to operate on
{%- else %}
DEV=$2 # namespace/device number to operate on
{%- endif %}
{%- if docker_container_name == "database" %}
if [ "$DEV" == "chassisdb" ]; then
DATABASE_TYPE="chassisdb"
Expand Down
16 changes: 16 additions & 0 deletions files/build_templates/gnmi.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[Unit]
Description=GNMI container
Requires=database.service
After=database.service swss.service syncd.service
Before=ntp-config.service
BindsTo=sonic.target
After=sonic.target
StartLimitIntervalSec=1200
StartLimitBurst=3

[Service]
User={{ sonicadmin_user }}
ExecStartPre=/usr/local/bin/gnmi.sh start
ExecStart=/usr/local/bin/gnmi.sh wait
ExecStop=/usr/local/bin/gnmi.sh stop
RestartSec=30
2 changes: 1 addition & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
{%- if include_restapi == "y" %}{% do features.append(("restapi", "enabled", false, "enabled")) %}{% endif %}
{%- if include_sflow == "y" %}{% do features.append(("sflow", "disabled", true, "enabled")) %}{% endif %}
{%- if include_macsec == "y" %}{% do features.append(("macsec", "{% if 'type' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['type'] == 'SpineRouter' and DEVICE_RUNTIME_METADATA['MACSEC_SUPPORTED'] %}enabled{% else %}disabled{% endif %}", false, "enabled")) %}{% endif %}
{%- if include_system_telemetry == "y" %}{% do features.append(("telemetry", "enabled", true, "enabled")) %}{% endif %}
{%- if include_system_gnmi == "y" %}{% do features.append(("gnmi", "enabled", true, "enabled")) %}{% do features.append(("telemetry", "disabled", false, "enabled")) %}{% endif %}
"FEATURE": {
{# delayed field if set, will start the feature systemd .timer unit instead of .service unit #}
{%- for feature, state, delayed, autorestart in features %}
Expand Down
6 changes: 3 additions & 3 deletions files/build_templates/telemetry.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ StartLimitBurst=3

[Service]
User={{ sonicadmin_user }}
ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop
ExecStartPre=/usr/local/bin/telemetry.sh start
ExecStart=/usr/local/bin/telemetry.sh wait
ExecStop=/usr/local/bin/telemetry.sh stop
RestartSec=30
24 changes: 20 additions & 4 deletions files/scripts/service_mgmt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,20 @@ start() {
debug "Starting ${SERVICE}$DEV service..."

# start service docker
/usr/bin/${SERVICE}.sh start $DEV
if [ x"$SERVICE" == x"gnmi" ] || [ x"$SERVICE" == x"telemetry" ]; then
/usr/bin/gnmi.sh start $SERVICE $DEV
else
/usr/bin/${SERVICE}.sh start $DEV
fi
debug "Started ${SERVICE}$DEV service..."
}

wait() {
/usr/bin/${SERVICE}.sh wait $DEV
if [ x"$SERVICE" == x"gnmi" ] || [ x"$SERVICE" == x"telemetry" ]; then
/usr/bin/gnmi.sh wait $SERVICE $DEV
else
/usr/bin/${SERVICE}.sh wait $DEV
fi
}

stop() {
Expand All @@ -74,12 +82,20 @@ stop() {
debug "Killing Docker ${SERVICE}${DEV} for active-active dualtor device..."
/usr/bin/${SERVICE}.sh kill $DEV
else
/usr/bin/${SERVICE}.sh stop $DEV
if [ x"$SERVICE" == x"gnmi" ] || [ x"$SERVICE" == x"telemetry" ]; then
/usr/bin/gnmi.sh stop $SERVICE $DEV
else
/usr/bin/${SERVICE}.sh stop $DEV
fi
debug "Stopped ${SERVICE}$DEV service..."
fi
else
debug "Killing Docker ${SERVICE}${DEV}..."
/usr/bin/${SERVICE}.sh kill $DEV
if [ x"$SERVICE" == x"gnmi" ] || [ x"$SERVICE" == x"telemetry" ]; then
/usr/bin/gnmi.sh kill $SERVICE $DEV
else
/usr/bin/${SERVICE}.sh kill $DEV
fi
fi
}

Expand Down
7 changes: 3 additions & 4 deletions rules/config
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ SONIC_DPKG_CACHE_SOURCE ?= /var/cache/sonic/artifacts
DEFAULT_VS_PREPARE_MEM = yes


# INCLUDE_SYSTEM_TELEMETRY - build docker-sonic-telemetry for system telemetry support
INCLUDE_SYSTEM_TELEMETRY = y
# INCLUDE_SYSTEM_GNMI - build docker-sonic-gnmi for system gnmi support
INCLUDE_SYSTEM_GNMI = y

# INCLUDE_ICCPD - build docker-iccpd for mclag support
INCLUDE_ICCPD = n
Expand All @@ -138,7 +138,7 @@ INCLUDE_SFLOW = y
INCLUDE_MGMT_FRAMEWORK = y

# ENABLE_HOST_SERVICE_ON_START - enable sonic-host-server for mgmt-framework and/or
# telemetry containers to access host functionality by default
# gnmi containers to access host functionality by default
ENABLE_HOST_SERVICE_ON_START = y

# INCLUDE_RESTAPI - build docker-sonic-restapi for configuring the switch using REST APIs
Expand All @@ -164,7 +164,6 @@ ENABLE_AUTO_TECH_SUPPORT = y
# ENABLE_TRANSLIB_WRITE = y

# ENABLE_NATIVE_WRITE - Enable native write/config operations via the gNMI interface.
# Uncomment to enable:
ENABLE_NATIVE_WRITE = y

# INCLUDE_MACSEC - build docker-macsec for macsec support
Expand Down
11 changes: 11 additions & 0 deletions rules/docker-gnmi.dep
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

DPATH := $($(DOCKER_GNMI)_PATH)
DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/docker-gnmi.mk rules/docker-gnmi.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES += $(shell git ls-files $(DPATH))

$(DOCKER_GNMI)_CACHE_MODE := GIT_CONTENT_SHA
$(DOCKER_GNMI)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST)
$(DOCKER_GNMI)_DEP_FILES := $(DEP_FILES)

$(eval $(call add_dbg_docker,$(DOCKER_GNMI),$(DOCKER_GNMI_DBG)))
Loading

0 comments on commit 482a400

Please sign in to comment.