Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Share image for gnmi and telemetry #16863

Merged
merged 11 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile.work
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ SONIC_BUILD_INSTRUCTION := $(MAKE) \
DOCKER_LOCKFILE_SAVE=$(DOCKER_LOCKFILE_SAVE) \
SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD=$(SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD) \
SONIC_INCLUDE_SYSTEM_TELEMETRY=$(INCLUDE_SYSTEM_TELEMETRY) \
SONIC_INCLUDE_SYSTEM_GNMI=$(INCLUDE_SYSTEM_GNMI) \
INCLUDE_DHCP_RELAY=$(INCLUDE_DHCP_RELAY) \
INCLUDE_DHCP_SERVER=$(INCLUDE_DHCP_SERVER) \
INCLUDE_MACSEC=$(INCLUDE_MACSEC) \
Expand Down
34 changes: 34 additions & 0 deletions dockers/docker-sonic-gnmi/Dockerfile.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}

ARG docker_container_name
ARG image_version

## Make apt-get non-interactive
ENV DEBIAN_FRONTEND=noninteractive

# Pass the image_version to container
ENV IMAGE_VERSION=$image_version

RUN apt-get update

{% if docker_sonic_gnmi_debs.strip() -%}
# Copy locally-built Debian package dependencies
{{ copy_files("debs/", docker_sonic_gnmi_debs.split(' '), "/debs/") }}

# Install locally-built Debian packages and implicitly install their dependencies
{{ install_debian_packages(docker_sonic_gnmi_debs.split(' ')) }}
{%- endif %}

RUN apt-get clean -y && \
apt-get autoclean - && \
apt-get autoremove -y && \
rm -rf /debs

COPY ["start.sh", "gnmi-native.sh", "dialout.sh", "/usr/bin/"]
COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"]
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]

ENTRYPOINT ["/usr/local/bin/supervisord"]
5 changes: 5 additions & 0 deletions dockers/docker-sonic-gnmi/base_image_files/monit_gnmi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
###############################################################################
## Monit configuration for telemetry container
###############################################################################
check program container_memory_gnmi with path "/usr/bin/memory_checker gnmi 419430400"
if status == 3 for 10 times within 20 cycles then exec "/usr/bin/restart_service gnmi" repeat every 2 cycles
1 change: 1 addition & 0 deletions dockers/docker-sonic-gnmi/critical_processes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
program:gnmi-native
6 changes: 6 additions & 0 deletions dockers/docker-sonic-gnmi/dialout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash

# Start with default config
export CVL_SCHEMA_PATH=/usr/sbin/schema
exec /usr/sbin/dialout_client_cli -insecure -logtostderr -v 2

105 changes: 105 additions & 0 deletions dockers/docker-sonic-gnmi/gnmi-native.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env bash

EXIT_TELEMETRY_VARS_FILE_NOT_FOUND=1
INCORRECT_TELEMETRY_VALUE=2
TELEMETRY_VARS_FILE=/usr/share/sonic/templates/telemetry_vars.j2

if [ ! -f "$TELEMETRY_VARS_FILE" ]; then
echo "Telemetry vars template file not found"
exit $EXIT_TELEMETRY_VARS_FILE_NOT_FOUND
fi

# Try to read telemetry and certs config from ConfigDB.
# Use default value if no valid config exists
TELEMETRY_VARS=$(sonic-cfggen -d -t $TELEMETRY_VARS_FILE)
TELEMETRY_VARS=${TELEMETRY_VARS//[\']/\"}
X509=$(echo $TELEMETRY_VARS | jq -r '.x509')
GNMI=$(echo $TELEMETRY_VARS | jq -r '.gnmi')
CERTS=$(echo $TELEMETRY_VARS | jq -r '.certs')

TELEMETRY_ARGS=" -logtostderr"
export CVL_SCHEMA_PATH=/usr/sbin/schema

if [ -n "$CERTS" ]; then
SERVER_CRT=$(echo $CERTS | jq -r '.server_crt')
SERVER_KEY=$(echo $CERTS | jq -r '.server_key')
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
TELEMETRY_ARGS+=" --insecure"
else
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
fi

CA_CRT=$(echo $CERTS | jq -r '.ca_crt')
if [ ! -z $CA_CRT ]; then
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
fi
elif [ -n "$X509" ]; then
SERVER_CRT=$(echo $X509 | jq -r '.server_crt')
SERVER_KEY=$(echo $X509 | jq -r '.server_key')
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
TELEMETRY_ARGS+=" --insecure"
else
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
fi

CA_CRT=$(echo $X509 | jq -r '.ca_crt')
if [ ! -z $CA_CRT ]; then
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
fi
else
TELEMETRY_ARGS+=" --noTLS"
fi

# If no configuration entry exists for TELEMETRY, create one default port
if [ -z "$GNMI" ]; then
PORT=8080
else
PORT=$(echo $GNMI | jq -r '.port')
fi
TELEMETRY_ARGS+=" --port $PORT"

CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth')
if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "false" ]; then
TELEMETRY_ARGS+=" --allow_no_client_auth"
fi

LOG_LEVEL=$(echo $GNMI | jq -r '.log_level')
if [[ $LOG_LEVEL =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" -v=$LOG_LEVEL"
else
TELEMETRY_ARGS+=" -v=2"
fi

# Enable ZMQ for SmartSwitch
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
fi

# Server will handle threshold connections consecutively
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS"
else
if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
TELEMETRY_ARGS+=" --threshold 100"
else
echo "Incorrect threshold value, expecting positive integers" >&2
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

# Close idle connections after certain duration (in seconds)
IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration')
if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION"
else
if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration 5"
else
echo "Incorrect idle_conn_duration value, expecting positive integers" >&2
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

exec /usr/sbin/telemetry ${TELEMETRY_ARGS}
18 changes: 18 additions & 0 deletions dockers/docker-sonic-gnmi/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

if [ "${RUNTIME_OWNER}" == "" ]; then
RUNTIME_OWNER="kube"
fi

CTR_SCRIPT="/usr/share/sonic/scripts/container_startup.py"
if test -f ${CTR_SCRIPT}
then
${CTR_SCRIPT} -f gnmi -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION}
fi

mkdir -p /var/sonic
echo "# Config files managed by sonic-config-engine" > /var/sonic/config_status

TZ=$(cat /etc/timezone)
rm -rf /etc/localtime
ln -sf /usr/share/zoneinfo/$TZ /etc/localtime
60 changes: 60 additions & 0 deletions dockers/docker-sonic-gnmi/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name gnmi
events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING
autostart=true
autorestart=false
buffer_size=1024

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true

[program:start]
command=/usr/bin/start.sh
priority=2
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running

[program:gnmi-native]
command=/usr/bin/gnmi-native.sh
priority=3
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=start:exited

[program:dialout]
command=/usr/bin/dialout.sh
priority=4
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=gnmi-native:running
5 changes: 5 additions & 0 deletions dockers/docker-sonic-gnmi/telemetry_vars.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have we already added GNMI table to config_db?

"certs": {% if "certs" in GNMI.keys() %}{{ GNMI["certs"] }}{% else %}""{% endif %},
"gnmi" : {% if "gnmi" in GNMI.keys() %}{{ GNMI["gnmi"] }}{% else %}""{% endif %},
"x509" : {% if "x509" in DEVICE_METADATA.keys() %}{{ DEVICE_METADATA["x509"] }}{% else %}""{% endif %}
}
3 changes: 1 addition & 2 deletions dockers/docker-sonic-telemetry/Dockerfile.j2
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
FROM docker-sonic-gnmi-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}

ARG docker_container_name
ARG image_version
Expand Down Expand Up @@ -28,7 +28,6 @@ RUN apt-get clean -y && \
COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"]
COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"]
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]

ENTRYPOINT ["/usr/local/bin/supervisord"]
12 changes: 3 additions & 9 deletions dockers/docker-sonic-telemetry/telemetry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,12 @@ else
TELEMETRY_ARGS+=" -v=2"
fi

# Enable ZMQ for SmartSwitch
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
fi

# Server will handle threshold connections consecutively
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS"
else
if [ -z $GNMI ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
TELEMETRY_ARGS+=" --threshold 100"
else
echo "Incorrect threshold value, expecting positive integers" >&2
Expand All @@ -94,13 +88,13 @@ IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration')
if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION"
else
if [ -z $GNMI ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
TELEMETRY_ARGS+=" --idle_conn_duration 5"
else
echo "Incorrect idle_conn_duration value, expecting positive integers" >&2
exit $INCORRECT_TELEMETRY_VALUE
fi
fi

TELEMETRY_ARGS+=" -gnmi_native_write=false"

exec /usr/sbin/telemetry ${TELEMETRY_ARGS}
16 changes: 16 additions & 0 deletions files/build_templates/gnmi.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[Unit]
Description=GNMI container
Requires=database.service
After=database.service swss.service syncd.service
Before=ntp-config.service
BindsTo=sonic.target
After=sonic.target
StartLimitIntervalSec=1200
StartLimitBurst=3

[Service]
User={{ sonicadmin_user }}
ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop
RestartSec=30
3 changes: 2 additions & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
{%- if include_restapi == "y" %}{% do features.append(("restapi", "enabled", false, "enabled")) %}{% endif %}
{%- if include_sflow == "y" %}{% do features.append(("sflow", "disabled", true, "enabled")) %}{% endif %}
{%- if include_macsec == "y" %}{% do features.append(("macsec", "{% if 'type' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['type'] == 'SpineRouter' and DEVICE_RUNTIME_METADATA['MACSEC_SUPPORTED'] %}enabled{% else %}disabled{% endif %}", false, "enabled")) %}{% endif %}
{%- if include_system_gnmi == "y" %}{% do features.append(("gnmi", "enabled", true, "enabled")) %}{% endif %}
{%- if include_system_telemetry == "y" %}{% do features.append(("telemetry", "enabled", true, "enabled")) %}{% endif %}
"FEATURE": {
{# delayed field if set, will start the feature systemd .timer unit instead of .service unit #}
Expand All @@ -76,7 +77,7 @@
"check_up_status" : "false",
{%- endif %}
{%- if include_kubernetes == "y" %}
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry"] %}
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry", "gnmi"] %}
"set_owner": "kube", {% else %}
"set_owner": "local", {% endif %} {% endif %}
"high_mem_alert": "disabled"
Expand Down
1 change: 1 addition & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,7 @@ sudo LANG=C cp $SCRIPTS_DIR/radv.sh $FILESYSTEM_ROOT/usr/local/bin/radv.sh
sudo LANG=C cp $SCRIPTS_DIR/database.sh $FILESYSTEM_ROOT/usr/local/bin/database.sh
sudo LANG=C cp $SCRIPTS_DIR/snmp.sh $FILESYSTEM_ROOT/usr/local/bin/snmp.sh
sudo LANG=C cp $SCRIPTS_DIR/telemetry.sh $FILESYSTEM_ROOT/usr/local/bin/telemetry.sh
sudo LANG=C cp $SCRIPTS_DIR/gnmi.sh $FILESYSTEM_ROOT/usr/local/bin/gnmi.sh
sudo LANG=C cp $SCRIPTS_DIR/mgmt-framework.sh $FILESYSTEM_ROOT/usr/local/bin/mgmt-framework.sh
sudo LANG=C cp $SCRIPTS_DIR/asic_status.sh $FILESYSTEM_ROOT/usr/local/bin/asic_status.sh
sudo LANG=C cp $SCRIPTS_DIR/asic_status.py $FILESYSTEM_ROOT/usr/local/bin/asic_status.py
Expand Down
1 change: 1 addition & 0 deletions files/image_config/logrotate/rsyslog.j2
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
/var/log/syslog
/var/log/teamd.log
/var/log/telemetry.log
/var/log/gnmi.log
/var/log/frr/bgpd.log
/var/log/frr/zebra.log
/var/log/swss/sairedis*.rec
Expand Down
6 changes: 6 additions & 0 deletions files/image_config/rsyslog/rsyslog.d/00-sonic.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ if $programname contains "teamd_" then {
stop
}

## gnmi rules
if $msg startswith " gnmi-native" then {
/var/log/gnmi.log
stop
}

## telemetry rules
if $msg startswith " telemetry" or ($msg startswith " dialout" )then {
/var/log/telemetry.log
Expand Down
1 change: 1 addition & 0 deletions files/scripts/gnmi.sh
7 changes: 4 additions & 3 deletions rules/config
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,11 @@ SONIC_DPKG_CACHE_SOURCE ?= /var/cache/sonic/artifacts
# Default VS build memory preparation
DEFAULT_VS_PREPARE_MEM = yes

# INCLUDE_SYSTEM_GNMI - build docker-sonic-gnmi for system gnmi support
INCLUDE_SYSTEM_GNMI = y
ganglyu marked this conversation as resolved.
Show resolved Hide resolved

# INCLUDE_SYSTEM_TELEMETRY - build docker-sonic-telemetry for system telemetry support
INCLUDE_SYSTEM_TELEMETRY = y
INCLUDE_SYSTEM_TELEMETRY = n

# INCLUDE_ICCPD - build docker-iccpd for mclag support
INCLUDE_ICCPD = n
Expand All @@ -138,7 +140,7 @@ INCLUDE_SFLOW = y
INCLUDE_MGMT_FRAMEWORK = y

# ENABLE_HOST_SERVICE_ON_START - enable sonic-host-server for mgmt-framework and/or
# telemetry containers to access host functionality by default
# gnmi containers to access host functionality by default
ENABLE_HOST_SERVICE_ON_START = y

# INCLUDE_RESTAPI - build docker-sonic-restapi for configuring the switch using REST APIs
Expand All @@ -164,7 +166,6 @@ ENABLE_AUTO_TECH_SUPPORT = y
# ENABLE_TRANSLIB_WRITE = y

# ENABLE_NATIVE_WRITE - Enable native write/config operations via the gNMI interface.
# Uncomment to enable:
ENABLE_NATIVE_WRITE = y

# INCLUDE_MACSEC - build docker-macsec for macsec support
Expand Down
Loading
Loading