From 9f08f88a0d1e45cdab3b1e5037a24908057e6129 Mon Sep 17 00:00:00 2001 From: Ze Gan Date: Sat, 18 Nov 2023 01:10:03 +0800 Subject: [PATCH] [dpu]: Add DPU database service (#17161) Sub PRs: sonic-net/sonic-host-services#84 #17191 Why I did it According to the design, the database instances of DPU will be kept in the NPU host. Microsoft ADO (number only): 25072889 How I did it To follow the multiple ASIC design, I assume a new platform environment variable NUM_DPU will be defined in the /usr/share/sonic/device/$PLATFORM/platform_env.conf. Based on this number, NPU host will launch a corresponding number of instances for the DPU database. Signed-off-by: Ze Gan --- dockers/docker-database/Dockerfile.j2 | 3 +- .../docker-database/database_config.json.j2 | 31 ++++++++++++++-- .../docker-database/database_global.json.j2 | 23 +++++++++++- .../docker-database/docker-database-init.sh | 16 +++++++-- files/build_templates/docker_image_ctl.j2 | 35 ++++++++++++++----- .../sonic_py_common/device_info.py | 26 ++++++++++++++ 6 files changed, 119 insertions(+), 15 deletions(-) diff --git a/dockers/docker-database/Dockerfile.j2 b/dockers/docker-database/Dockerfile.j2 index f5a8ec083675..22e88275e261 100644 --- a/dockers/docker-database/Dockerfile.j2 +++ b/dockers/docker-database/Dockerfile.j2 @@ -32,7 +32,8 @@ RUN apt-get clean -y && \ s/^# unixsocket/unixsocket/; \ s/redis-server.sock/redis.sock/g; \ s/^client-output-buffer-limit pubsub [0-9]+mb [0-9]+mb [0-9]+/client-output-buffer-limit pubsub 0 0 0/; \ - s/^notify-keyspace-events ""$/notify-keyspace-events AKE/ \ + s/^notify-keyspace-events ""$/notify-keyspace-events AKE/; \ + s/^databases [0-9]+$/databases 100/ \ ' /etc/redis/redis.conf COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"] diff --git a/dockers/docker-database/database_config.json.j2 b/dockers/docker-database/database_config.json.j2 index f0807b238375..bacc1d356649 100644 --- a/dockers/docker-database/database_config.json.j2 +++ b/dockers/docker-database/database_config.json.j2 @@ -2,9 +2,12 @@ "INSTANCES": { "redis":{ "hostname" : "{{HOST_IP}}", - "port" : 6379, - "unix_socket_path" : "/var/run/redis{{NAMESPACE_ID}}/redis.sock", + "port" : {{REDIS_PORT}}, + "unix_socket_path" : "/var/run/redis{{DEV}}/redis.sock", "persistence_for_warm_boot" : "yes" + {% if DATABASE_TYPE is defined and DATABASE_TYPE != "" %} + ,"database_type": "{{DATABASE_TYPE}}" + {% endif %} }, "redis_chassis":{ "hostname" : "redis_chassis.server", @@ -94,6 +97,30 @@ "separator": ":", "instance" : "redis" } +{% if DATABASE_TYPE is defined and DATABASE_TYPE == "dpudb" %} + , + "DPU_APPL_DB" : { + "id" : 15, + "separator": ":", + "instance" : "redis", + "format": "proto" + }, + "DPU_APPL_STATE_DB" : { + "id" : 16, + "separator": "|", + "instance" : "redis" + }, + "DPU_STATE_DB" : { + "id" : 17, + "separator": "|", + "instance" : "redis" + }, + "DPU_COUNTERS_DB" : { + "id" : 18, + "separator": ":", + "instance" : "redis" + } +{% endif %} }, "VERSION" : "1.0" } diff --git a/dockers/docker-database/database_global.json.j2 b/dockers/docker-database/database_global.json.j2 index 777bce43b324..535b71446203 100644 --- a/dockers/docker-database/database_global.json.j2 +++ b/dockers/docker-database/database_global.json.j2 @@ -1,9 +1,15 @@ {% set namespace_cnt = NAMESPACE_COUNT|int %} +{% if NUM_DPU is defined %} +{% set dpu_cnt = NUM_DPU | int %} +{% else %} +{% set dpu_cnt = 0 %} +{% endif %} { "INCLUDES" : [ { "include" : "../../redis/sonic-db/database_config.json" }, + {% if namespace_cnt > 1 %} {% for ns in range(namespace_cnt) %} { @@ -15,7 +21,22 @@ }, {% endif %} {% endfor %} +{% endif %} + +{% if dpu_cnt > 0 %} +{% for dpu in range(dpu_cnt) %} + { + "database_type" : "dpudb", + "include" : "../../redisdpu{{dpu}}/sonic-db/database_config.json" +{% if dpu == dpu_cnt-1 %} + } +{% else %} + }, +{% endif %} +{% endfor %} +{% endif %} + ], "VERSION" : "1.0" } -{% endif %} + diff --git a/dockers/docker-database/docker-database-init.sh b/dockers/docker-database/docker-database-init.sh index 8998a161bd3f..1aa8b5e5a15d 100755 --- a/dockers/docker-database/docker-database-init.sh +++ b/dockers/docker-database/docker-database-init.sh @@ -17,6 +17,18 @@ then host_ip=127.0.0.1 fi +redis_port=6379 + +if [[ $DATABASE_TYPE == "dpudb" ]]; then + host_ip="169.254.200.254" + if ! ip -4 -o addr | awk '{print $4}' | grep $host_ip; then + host_ip=127.0.0.1 + fi + DPU_ID=`echo $DEV | tr -dc '0-9'` + redis_port=`expr 6381 + $DPU_ID` +fi + + REDIS_DIR=/var/run/redis$NAMESPACE_ID mkdir -p $REDIS_DIR/sonic-db mkdir -p /etc/supervisor/conf.d/ @@ -24,7 +36,7 @@ mkdir -p /etc/supervisor/conf.d/ if [ -f /etc/sonic/database_config$NAMESPACE_ID.json ]; then cp /etc/sonic/database_config$NAMESPACE_ID.json $REDIS_DIR/sonic-db/database_config.json else - HOST_IP=$host_ip j2 /usr/share/sonic/templates/database_config.json.j2 > $REDIS_DIR/sonic-db/database_config.json + HOST_IP=$host_ip REDIS_PORT=$redis_port DATABASE_TYPE=$DATABASE_TYPE j2 /usr/share/sonic/templates/database_config.json.j2 > $REDIS_DIR/sonic-db/database_config.json fi # on VoQ system, we only publish redis_chassis instance and CHASSIS_APP_DB when @@ -59,7 +71,7 @@ if [[ $DATABASE_TYPE == "chassisdb" ]]; then fi # copy/generate the database_global.json file if this is global database service in multi asic platform. -if [[ $NAMESPACE_ID == "" ]] && [[ $NAMESPACE_COUNT -gt 1 ]] +if [[ $DATABASE_TYPE == "" ]] && [[ $NAMESPACE_COUNT -gt 1 || $NUM_DPU -gt 1 ]] then if [ -f /etc/sonic/database_global.json ]; then cp /etc/sonic/database_global.json $REDIS_DIR/sonic-db/database_global.json diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index f8e5d8b4b5ad..3397f7a88bc4 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -58,7 +58,7 @@ function updateSyslogConf() } function ebtables_config() { - if [ "$DEV" ]; then + if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then # Install ebtables filter in namespaces on multi-asic. ip netns exec $NET_NS ebtables-restore < /etc/ebtables.filter.cfg else @@ -169,7 +169,7 @@ function postStartAction() {%- if docker_container_name == "database" %} CHASSISDB_CONF="/usr/share/sonic/device/$PLATFORM/chassisdb.conf" [ -f $CHASSISDB_CONF ] && source $CHASSISDB_CONF - if [ "$DEV" ]; then + if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then # Enable the forwarding on eth0 interface in namespace. SYSCTL_NET_CONFIG="/etc/sysctl.d/sysctl-net.conf" docker exec -i database$DEV sed -i -e "s/^net.ipv4.conf.eth0.forwarding=0/net.ipv4.conf.eth0.forwarding=1/; @@ -223,7 +223,7 @@ function postStartAction() # databases are not availbale until database container is ready. # also chassisdb doesn't support warm/fast reboot, its dump.rdb is deleted # at service startup time, nothing need to be done here. - if [ "$DATABASE_TYPE" != "chassisdb" ]; then + if [[ "$DATABASE_TYPE" != "chassisdb" ]]; then # Wait until supervisord and redis starts. This change is needed # because now database_config.json is jinja2 templated based # and by the time file gets generated if we do redis ping @@ -503,7 +503,7 @@ start() { fi {%- endif %} - if [ -z "$DEV" ]; then + if [[ -z "$DEV" || $DATABASE_TYPE == "dpudb" ]]; then NET="host" # For Multi-ASIC platform we have to mount the redis paths for database instances running in different @@ -528,6 +528,11 @@ start() { DB_OPT=$DB_OPT" --env DATABASE_TYPE=$DATABASE_TYPE" else DB_OPT=$DB_OPT" -v /var/run/redis$DEV:/var/run/redis:rw " + DB_OPT=$DB_OPT" --env DATABASE_TYPE=$DATABASE_TYPE " + DB_OPT=$DB_OPT" --env NUM_DPU=$NUM_DPU " + if [[ "$DEV" ]]; then + DB_OPT=$DB_OPT" -v /var/run/redis$DEV:/var/run/redis$DEV:rw " + fi fi {%- endif %} else @@ -557,6 +562,12 @@ start() { fi fi {%- endif %} + + NAMESPACE_ID="$DEV" + if [[ $DATABASE_TYPE == "dpudb" ]]; then + NAMESPACE_ID="" + fi + {%- if sonic_asic_platform == "mellanox" %} # TODO: Mellanox will remove the --tmpfs exception after SDK socket path changed in new SDK version {%- endif %} @@ -630,9 +641,10 @@ start() { {%- if mount_default_tmpfs|default("n") == "y" %} --tmpfs /var/tmp \ {%- endif %} - --env "NAMESPACE_ID"="$DEV" \ + --env "NAMESPACE_ID"="$NAMESPACE_ID" \ --env "NAMESPACE_PREFIX"="$NAMESPACE_PREFIX" \ - --env "NAMESPACE_COUNT"=$NUM_ASIC \ + --env "NAMESPACE_COUNT"="$NUM_ASIC" \ + --env "DEV"="$DEV" \ --env "CONTAINER_NAME"=$DOCKERNAME \ --name=$DOCKERNAME \ {%- if docker_container_name == "gbsyncd" %} @@ -668,7 +680,7 @@ wait() { stop() { {%- if docker_container_name == "database" %} docker stop $DOCKERNAME - if [ "$DEV" ]; then + if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then ip netns delete "$NET_NS" fi {%- elif docker_container_name == "teamd" %} @@ -684,7 +696,7 @@ stop() { kill() { {%- if docker_container_name == "database" %} docker kill $DOCKERNAME - if [ "$DEV" ]; then + if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then ip netns delete "$NET_NS" fi {%- else %} @@ -701,11 +713,16 @@ if [ "$DEV" == "chassisdb" ]; then DOCKERNAME=$DOCKERNAME"-chassis" unset DEV fi + +if [[ "$DEV" == *"dpu"* ]]; then + DATABASE_TYPE="dpudb" +fi + {%- endif %} NAMESPACE_PREFIX="asic" DOCKERNAME=$DOCKERNAME$DEV CONTAINER_EXISTS="no" -if [ "$DEV" ]; then +if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace SONIC_CFGGEN="sonic-cfggen -n $NET_NS" diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 4e18af6331fa..43af72c014a5 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -45,6 +45,9 @@ CHASSIS_INFO_MODEL_FIELD = 'model' CHASSIS_INFO_REV_FIELD = 'revision' +# DPU constants +DPU_NAME_PREFIX = "dpu" + # Cacheable Objects sonic_ver_info = {} hw_info_dict = {} @@ -841,3 +844,26 @@ def is_frontend_port_present_in_host(): if not namespace_id: return False return True + + +def get_num_dpus(): + # Todo: we should use platform api to get the dpu number + # instead of rely on the platform env config. + num_dpus = 0 + platform_env_conf_file_path = get_platform_env_conf_file_path() + + # platform_env.conf file not present for platform + if platform_env_conf_file_path is None: + return num_dpus + + # Else open the file check for keyword - num_dpu - + with open(platform_env_conf_file_path) as platform_env_conf_file: + for line in platform_env_conf_file: + tokens = line.split('=') + if len(tokens) < 2: + continue + if tokens[0].lower() == 'num_dpu': + num_dpus = tokens[1].strip() + break + return int(num_dpus) +