From c9f66250a81dc8bec347007e96b2da0a0d08cf4c Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Sun, 29 Jan 2023 14:31:27 +0800 Subject: [PATCH] [docker](iceberg) add iceberg docker compose and modify scripts (#16175) Add iceberg docker compose Rename start-thirdparties-docker.sh to run-thirdparties-docker.sh and support start to stop specified components. --- .github/workflows/code-checks.yml | 2 +- .licenserc.yaml | 1 + docker/thirdparties/custom_settings.env | 23 ++ .../elasticsearch/{es.yaml => es.yaml.tpl} | 0 ...p-hive.env.tpl => hadoop-hive.env.tpl.tpl} | 0 .../hive/{hive-2x.yaml => hive-2x.yaml.tpl} | 0 .../docker-compose/iceberg/entrypoint.sh.tpl | 34 +++ .../docker-compose/iceberg/iceberg.env | 24 ++ .../docker-compose/iceberg/iceberg.yaml.tpl | 96 +++++++ .../iceberg/spark-defaults.conf.tpl | 11 + .../{mysql-5.7.yaml => mysql-5.7.yaml.tpl} | 0 .../{oracle-11.yaml => oracle-11.yaml.tpl} | 2 +- ...tgresql-14.yaml => postgresql-14.yaml.tpl} | 0 .../{sqlserver.yaml => sqlserver.yaml.tpl} | 0 .../thirdparties/run-thirdparties-docker.sh | 251 ++++++++++++++++++ .../thirdparties/start-thirdparties-docker.sh | 79 ------ .../thirdparties/stop-thirdparties-docker.sh | 43 --- .../developer-guide/regression-testing.md | 111 ++++++-- .../lakehouse/multi-catalog/multi-catalog.md | 2 +- 19 files changed, 530 insertions(+), 149 deletions(-) create mode 100644 docker/thirdparties/custom_settings.env rename docker/thirdparties/docker-compose/elasticsearch/{es.yaml => es.yaml.tpl} (100%) rename docker/thirdparties/docker-compose/hive/{hadoop-hive.env.tpl => hadoop-hive.env.tpl.tpl} (100%) rename docker/thirdparties/docker-compose/hive/{hive-2x.yaml => hive-2x.yaml.tpl} (100%) create mode 100755 docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl create mode 100644 docker/thirdparties/docker-compose/iceberg/iceberg.env create mode 100644 docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl create mode 100644 docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl rename docker/thirdparties/docker-compose/mysql/{mysql-5.7.yaml => mysql-5.7.yaml.tpl} (100%) rename docker/thirdparties/docker-compose/oracle/{oracle-11.yaml => oracle-11.yaml.tpl} (98%) rename docker/thirdparties/docker-compose/postgresql/{postgresql-14.yaml => postgresql-14.yaml.tpl} (100%) rename docker/thirdparties/docker-compose/sqlserver/{sqlserver.yaml => sqlserver.yaml.tpl} (100%) create mode 100755 docker/thirdparties/run-thirdparties-docker.sh delete mode 100755 docker/thirdparties/start-thirdparties-docker.sh delete mode 100755 docker/thirdparties/stop-thirdparties-docker.sh diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 3f804fea334d1f..577fd72f4d3484 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -49,7 +49,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: sh_checker_comment: true - sh_checker_exclude: .git .github ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output (^|.*/)Dockerfile$ + sh_checker_exclude: .git .github ^docker/thirdparties ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output (^|.*/)Dockerfile$ clang-tidy: name: "Clang Tidy" diff --git a/.licenserc.yaml b/.licenserc.yaml index 8e02ad6e8853ae..2b6e8c30a1e538 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -78,5 +78,6 @@ header: - "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_orc.hql" - "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_parquet.hql" - "docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/" + - "docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl" comment: on-failure diff --git a/docker/thirdparties/custom_settings.env b/docker/thirdparties/custom_settings.env new file mode 100644 index 00000000000000..d1f9fbb6285633 --- /dev/null +++ b/docker/thirdparties/custom_settings.env @@ -0,0 +1,23 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Change this to a specific string. +# Do not use "_" or other sepcial characters, only number and alphabeta. +# eg: CONTAINER_UID="doris-jack-" +# NOTICE: change this uid will modify the file in docker-compose. +CONTAINER_UID="doris--" diff --git a/docker/thirdparties/docker-compose/elasticsearch/es.yaml b/docker/thirdparties/docker-compose/elasticsearch/es.yaml.tpl similarity index 100% rename from docker/thirdparties/docker-compose/elasticsearch/es.yaml rename to docker/thirdparties/docker-compose/elasticsearch/es.yaml.tpl diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl similarity index 100% rename from docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl rename to docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl similarity index 100% rename from docker/thirdparties/docker-compose/hive/hive-2x.yaml rename to docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl new file mode 100755 index 00000000000000..e862778b63cf8c --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl @@ -0,0 +1,34 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export SPARK_MASTER_HOST=doris--spark-iceberg + +start-master.sh -p 7077 +start-worker.sh spark://doris--spark-iceberg:7077 +start-history-server.sh +start-thriftserver.sh + +# Entrypoint, for example notebook, pyspark or spark-sql +if [[ $# -gt 0 ]]; then + eval "$1" +fi + +# Avoid container exit +while true; do + sleep 1 +done diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env b/docker/thirdparties/docker-compose/iceberg/iceberg.env new file mode 100644 index 00000000000000..e4a95c99ce29dc --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/iceberg.env @@ -0,0 +1,24 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +NOTEBOOK_SERVER_PORT=8888 +SPARK_DRIVER_UI_PORT=8080 +SPARK_HISTORY_UI_PORT=10000 +REST_CATALOG_PORT=8181 +MINIO_UI_PORT=9000 +MINIO_API_PORT=9001 diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl new file mode 100644 index 00000000000000..fe8e29b77e44ce --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl @@ -0,0 +1,96 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version: "3" + +services: + doris--spark-iceberg: + image: tabulario/spark-iceberg + container_name: doris--spark-iceberg + hostname: doris--spark-iceberg + build: spark/ + depends_on: + - doris--rest + - doris--minio + volumes: + - ./warehouse:/home/iceberg/warehouse + - ./notebooks:/home/iceberg/notebooks/notebooks + - ./entrypoint.sh:/opt/spark/entrypoint.sh + - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + ports: + - ${NOTEBOOK_SERVER_PORT}:8888 + - ${SPARK_DRIVER_UI_PORT}:8080 + - ${SPARK_HISTORY_UI_PORT}:10000 + links: + - doris--rest:rest + - doris--minio:minio + networks: + - doris--iceberg + entrypoint: + - /opt/spark/entrypoint.sh + + doris--rest: + image: tabulario/iceberg-rest:0.2.0 + ports: + - ${REST_CATALOG_PORT}:8181 + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - CATALOG_WAREHOUSE=s3a://warehouse/wh/ + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://doris--minio:9000 + networks: + - doris--iceberg + doris--minio: + image: minio/minio + container_name: doris--minio + hostname: doris--minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + ports: + - ${MINIO_UI_PORT}:9001 + - ${MINIO_API_PORT}:9000 + networks: + - doris--iceberg + command: ["server", "/data", "--console-address", ":9001"] + doris--mc: + depends_on: + - doris--minio + image: minio/mc + container_name: doris--mc + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + networks: + - doris--iceberg + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://doris--minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force minio/warehouse; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + exit 0; + " +networks: + doris--iceberg: diff --git a/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl b/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl new file mode 100644 index 00000000000000..6e30b37164407d --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl @@ -0,0 +1,11 @@ +spark.sql.catalog.demo org.apache.iceberg.spark.SparkCatalog +spark.sql.catalog.demo.catalog-impl org.apache.iceberg.rest.RESTCatalog +spark.sql.catalog.demo.uri http://doris--rest:8181 +spark.sql.catalog.demo.io-impl org.apache.iceberg.aws.s3.S3FileIO +spark.sql.catalog.demo.warehouse s3a://warehouse/wh/ +spark.sql.catalog.demo.s3.endpoint http://doris--minio:9000 +spark.sql.defaultCatalog demo +spark.eventLog.enabled true +spark.eventLog.dir /home/iceberg/spark-events +spark.history.fs.logDirectory /home/iceberg/spark-events +spark.sql.catalogImplementation in-memory diff --git a/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml b/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl similarity index 100% rename from docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml rename to docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl diff --git a/docker/thirdparties/docker-compose/oracle/oracle-11.yaml b/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl similarity index 98% rename from docker/thirdparties/docker-compose/oracle/oracle-11.yaml rename to docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl index 93225aacd357f3..448b009170bdf2 100644 --- a/docker/thirdparties/docker-compose/oracle/oracle-11.yaml +++ b/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl @@ -46,4 +46,4 @@ services: condition: service_healthy networks: - doris--oracle_11: \ No newline at end of file + doris--oracle_11: diff --git a/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml b/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl similarity index 100% rename from docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml rename to docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl diff --git a/docker/thirdparties/docker-compose/sqlserver/sqlserver.yaml b/docker/thirdparties/docker-compose/sqlserver/sqlserver.yaml.tpl similarity index 100% rename from docker/thirdparties/docker-compose/sqlserver/sqlserver.yaml rename to docker/thirdparties/docker-compose/sqlserver/sqlserver.yaml.tpl diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh new file mode 100755 index 00000000000000..bb68a2fe3760e3 --- /dev/null +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -0,0 +1,251 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +################################################################ +# This script will restart all thirdparty containers +################################################################ + +set -eo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +. "${ROOT}/custom_settings.env" + +usage() { + echo " +Usage: $0 + Optional options: + [no option] start all components + --help,-h show this usage + -c mysql start MySQL + -c mysql,hive start MySQL and Hive + --stop stop the specified components + + All valid components: + mysql,pg,oracle,sqlserver,es,hive,iceberg + " + exit 1 +} + +if ! OPTS="$(getopt \ + -n "$0" \ + -o '' \ + -l 'help' \ + -l 'stop' \ + -o 'hc:' \ + -- "$@")"; then + usage +fi + +eval set -- "${OPTS}" + +COMPONENTS="" +HELP=0 +STOP=0 + +if [[ "$#" == 1 ]]; then + # default + COMPONENTS="mysql,pg,oracle,sqlserver,hive,iceberg" +else + while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + --help) + HELP=1 + shift + ;; + --stop) + STOP=1 + shift + ;; + -c) + COMPONENTS=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac + done + if [[ "${COMPONENTS}"x == ""x ]]; then + if [[ "${STOP}" -eq 1 ]]; then + COMPONENTS="mysql,pg,oracle,sqlserver,hive,iceberg" + fi + fi +fi + +if [[ "${HELP}" -eq 1 ]]; then + usage + exit 0 +fi + +if [[ "${COMPONENTS}"x == ""x ]]; then + echo "Invalid arguments" + usage + exit 1 +fi + +if [[ "${CONTAINER_UID}"x == "doris--"x ]]; then + echo "Must set CONTAINER_UID to a unique name in custom_settings.sh" + exit 1 +fi + +echo "Components are: ${COMPONENTS}" +echo "Container UID: ${CONTAINER_UID}" +echo "Stop: ${STOP}" + +OLD_IFS="${IFS}" +IFS=',' +read -r -a COMPONENTS_ARR <<<"${COMPONENTS}" +IFS="${OLD_IFS}" + +RUN_MYSQL=0 +RUN_PG=0 +RUN_ORACLE=0 +RUN_SQLSERVER=0 +RUN_HIVE=0 +RUN_ES=0 +RUN_ICEBERG=0 +for element in "${COMPONENTS_ARR[@]}"; do + if [[ "${element}"x == "mysql"x ]]; then + RUN_MYSQL=1 + elif [[ "${element}"x == "pg"x ]]; then + RUN_PG=1 + elif [[ "${element}"x == "oracle"x ]]; then + RUN_ORACLE=1 + elif [[ "${element}"x == "sqlserver"x ]]; then + RUN_SQLSERVER=1 + elif [[ "${element}"x == "es"x ]]; then + RUN_ES=1 + elif [[ "${element}"x == "hive"x ]]; then + RUN_HIVE=1 + elif [[ "${element}"x == "iceberg"x ]]; then + RUN_ICEBERG=1 + else + echo "Invalid component: ${element}" + usage + exit 1 + fi +done + +if [[ "${RUN_ES}" -eq 1 ]]; then + # elasticsearch + cp "${ROOT}"/docker-compose/elasticsearch/es.yaml.tpl "${ROOT}"/docker-compose/elasticsearch/es.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/elasticsearch/es.yaml + sudo docker compose -f "${ROOT}"/docker-compose/elasticsearch/es.yaml --env-file "${ROOT}"/docker-compose/elasticsearch/es.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es6/ + sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es6/* + sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es7/ + sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es7/* + sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es8/ + sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es8/* + sudo chmod -R 777 "${ROOT}"/docker-compose/elasticsearch/data + sudo docker compose -f "${ROOT}"/docker-compose/elasticsearch/es.yaml --env-file "${ROOT}"/docker-compose/elasticsearch/es.env up -d --remove-orphans + fi +fi + +if [[ "${RUN_MYSQL}" -eq 1 ]]; then + # mysql 5.7 + cp "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml.tpl "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml + sudo docker compose -f "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml --env-file "${ROOT}"/docker-compose/mysql/mysql-5.7.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo mkdir -p "${ROOT}"/docker-compose/mysql/data/ + sudo rm "${ROOT}"/docker-compose/mysql/data/* -rf + sudo docker compose -f "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml --env-file "${ROOT}"/docker-compose/mysql/mysql-5.7.env up -d + fi +fi + +if [[ "${RUN_PG}" -eq 1 ]]; then + # pg 14 + cp "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml.tpl "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml + sudo docker compose -f "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml --env-file "${ROOT}"/docker-compose/postgresql/postgresql-14.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo mkdir -p "${ROOT}"/docker-compose/postgresql/data/data + sudo rm "${ROOT}"/docker-compose/postgresql/data/data/* -rf + sudo docker compose -f "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml --env-file "${ROOT}"/docker-compose/postgresql/postgresql-14.env up -d + fi +fi + +if [[ "${RUN_ORACLE}" -eq 1 ]]; then + # oracle + cp "${ROOT}"/docker-compose/oracle/oracle-11.yaml.tpl "${ROOT}"/docker-compose/oracle/oracle-11.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/oracle/oracle-11.yaml + sudo docker compose -f "${ROOT}"/docker-compose/oracle/oracle-11.yaml --env-file "${ROOT}"/docker-compose/oracle/oracle-11.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo mkdir -p "${ROOT}"/docker-compose/oracle/data/ + sudo rm "${ROOT}"/docker-compose/oracle/data/* -rf + sudo docker compose -f "${ROOT}"/docker-compose/oracle/oracle-11.yaml --env-file "${ROOT}"/docker-compose/oracle/oracle-11.env up -d + fi +fi + +if [[ "${RUN_SQLSERVER}" -eq 1 ]]; then + # sqlserver + cp "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml.tpl "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml + sudo docker compose -f "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml --env-file "${ROOT}"/docker-compose/sqlserver/sqlserver.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo mkdir -p "${ROOT}"/docker-compose/sqlserver/data/ + sudo rm "${ROOT}"/docker-compose/sqlserver/data/* -rf + sudo docker compose -f "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml --env-file "${ROOT}"/docker-compose/sqlserver/sqlserver.env up -d + fi +fi + +if [[ "${RUN_HIVE}" -eq 1 ]]; then + # hive + # before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/" + cp "${ROOT}"/docker-compose/hive/hive-2x.yaml.tpl "${ROOT}"/docker-compose/hive/hive-2x.yaml + cp "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl.tpl "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl + sudo "${ROOT}"/docker-compose/hive/gen_env.sh + sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up -d + fi +fi + +if [[ "${RUN_ICEBERG}" -eq 1 ]]; then + # iceberg + cp "${ROOT}"/docker-compose/iceberg/iceberg.yaml.tpl "${ROOT}"/docker-compose/iceberg/iceberg.yaml + cp "${ROOT}"/docker-compose/iceberg/entrypoint.sh.tpl "${ROOT}"/docker-compose/iceberg/entrypoint.sh + cp "${ROOT}"/docker-compose/iceberg/spark-defaults.conf.tpl "${ROOT}"/docker-compose/iceberg/spark-defaults.conf + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/iceberg.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/entrypoint.sh + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/spark-defaults.conf + sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks + sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks + sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark + sudo mkdir "${ROOT}"/docker-compose/iceberg/spark + sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse + sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse + sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d + fi +fi diff --git a/docker/thirdparties/start-thirdparties-docker.sh b/docker/thirdparties/start-thirdparties-docker.sh deleted file mode 100755 index e25be547117cf4..00000000000000 --- a/docker/thirdparties/start-thirdparties-docker.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -################################################################ -# This script will restart all thirdparty containers -################################################################ - -set -eo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" - -# If you want to start multi group of these containers on same host, -# Change this to a specific string. -# Do not use "_" or other sepcial characters, only number and alphabeta. -# NOTICE: change this uid will modify the file in docker-compose. -CONTAINER_UID="doris--" - -# elasticsearch -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/elasticsearch/es.yaml -sudo docker compose -f "${ROOT}"/docker-compose/elasticsearch/es.yaml --env-file "${ROOT}"/docker-compose/elasticsearch/es.env down -sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es6/ -sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es6/* -sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es7/ -sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es7/* -sudo mkdir -p "${ROOT}"/docker-compose/elasticsearch/data/es8/ -sudo rm -rf "${ROOT}"/docker-compose/elasticsearch/data/es8/* -sudo chmod -R 777 "${ROOT}"/docker-compose/elasticsearch/data -sudo docker compose -f "${ROOT}"/docker-compose/elasticsearch/es.yaml --env-file "${ROOT}"/docker-compose/elasticsearch/es.env up -d --remove-orphans - -# mysql 5.7 -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml -sudo docker compose -f "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml --env-file "${ROOT}"/docker-compose/mysql/mysql-5.7.env down -sudo mkdir -p "${ROOT}"/docker-compose/mysql/data/ -sudo rm "${ROOT}"/docker-compose/mysql/data/* -rf -sudo docker compose -f "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml --env-file "${ROOT}"/docker-compose/mysql/mysql-5.7.env up -d - -# pg 14 -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml -sudo docker compose -f "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml --env-file "${ROOT}"/docker-compose/postgresql/postgresql-14.env down -sudo mkdir -p "${ROOT}"/docker-compose/postgresql/data/data -sudo rm "${ROOT}"/docker-compose/postgresql/data/data/* -rf -sudo docker compose -f "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml --env-file "${ROOT}"/docker-compose/postgresql/postgresql-14.env up -d - -# oracle -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/oracle/oracle-11.yaml -sudo docker compose -f "${ROOT}"/docker-compose/oracle/oracle-11.yaml --env-file "${ROOT}"/docker-compose/oracle/oracle-11.env down -sudo mkdir -p "${ROOT}"/docker-compose/oracle/data/ -sudo rm "${ROOT}"/docker-compose/oracle/data/* -rf -sudo docker compose -f "${ROOT}"/docker-compose/oracle/oracle-11.yaml --env-file "${ROOT}"/docker-compose/oracle/oracle-11.env up -d - -# sqlserver -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml -sudo docker compose -f "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml --env-file "${ROOT}"/docker-compose/sqlserver/sqlserver.env down -sudo mkdir -p "${ROOT}"/docker-compose/sqlserver/data/ -sudo rm "${ROOT}"/docker-compose/sqlserver/data/* -rf -sudo docker compose -f "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml --env-file "${ROOT}"/docker-compose/sqlserver/sqlserver.env up -d - -# hive -# before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/" -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml -sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl -sudo "${ROOT}"/docker-compose/hive/gen_env.sh -sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down -sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up -d diff --git a/docker/thirdparties/stop-thirdparties-docker.sh b/docker/thirdparties/stop-thirdparties-docker.sh deleted file mode 100755 index 963bd1e85dec60..00000000000000 --- a/docker/thirdparties/stop-thirdparties-docker.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -################################################################ -# This script will stop all thirdparty containers -################################################################ - -set -eo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" - -# elasticsearch -sudo docker compose -f "${ROOT}"/docker-compose/elasticsearch/es.yaml --env-file "${ROOT}"/docker-compose/elasticsearch/es.env down - -# mysql 5.7 -sudo docker compose -f "${ROOT}"/docker-compose/mysql/mysql-5.7.yaml --env-file "${ROOT}"/docker-compose/mysql/mysql-5.7.env down - -# pg 14 -sudo docker compose -f "${ROOT}"/docker-compose/postgresql/postgresql-14.yaml --env-file "${ROOT}"/docker-compose/postgresql/postgresql-14.env down - -# oracle 11 -sudo docker compose -f "${ROOT}"/docker-compose/oracle/oracle-11.yaml --env-file "${ROOT}"/docker-compose/oracle/oracle-11.env down - -# sqlserver -sudo docker compose -f "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml --env-file "${ROOT}"/docker-compose/sqlserver/sqlserver.env down - -# hive -sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down diff --git a/docs/zh-CN/community/developer-guide/regression-testing.md b/docs/zh-CN/community/developer-guide/regression-testing.md index 53eba390990859..60469ddbb3c7e5 100644 --- a/docs/zh-CN/community/developer-guide/regression-testing.md +++ b/docs/zh-CN/community/developer-guide/regression-testing.md @@ -595,15 +595,23 @@ JAVA_OPTS="-Dteamcity.enableStdErr=${enableStdErr}" ./run-regression-test.sh --t Doris 支持一些外部署数据源的查询。所以回归框架也提供了通过 Docker Compose 搭建外部数据源的功能,以提供 Doris 对外部数据源的 e2e 测试。 +0. 准备工作 + + 在启动 Docker 前,请先修改 `docker/thirdparties/custom_settings.env` 文件中的 `CONTAINER_UID` 变量。 + + 可以修改为如:`doris-10002-18sda1-`。 + + 之后的启动脚本会,将 docker compose 中对应的名称进行替换,这样可以保证多套 containers 环境的容器名称和网络不会冲突。 + 1. 启动 Container - Doris 目前支持 es, mysql, pg, hive 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 + Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 默认情况下,可以直接通过以下命令启动所有外部数据源的 Docker container: (注意,hive container 需要下载预制的数据文件,请参阅下面 hive 相关的文档。) ``` - cd docker/thirdparties && sh start-thirdparties-docker.sh + cd docker/thirdparties && sh run-thirdparties-docker.sh ``` 该命令需要 root 或 sudo 权限。命令返回成功,则代表所有 container 启动完成。可以通过 `docker ps -a` 命令查看。 @@ -611,46 +619,114 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 可以通过以下命令停止所有 container: ``` - cd docker/thirdparties && sh stop-thirdparties-docker.sh + cd docker/thirdparties && sh run-thirdparties-docker.sh --stop + ``` + + 也可以通过以下命令启动或停止指定的组件: + + ``` + cd docker/thirdparties + # 启动 mysql + sh run-thirdparties-docker.sh -c mysql + # 启动 mysql,pg,iceberg + sh run-thirdparties-docker.sh -c mysql,pg,iceberg + # 停止 mysql,pg,iceberg + sh run-thirdparties-docker.sh -c mysql,pg,iceberg --stop ``` 1. MySQL MySQL 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/mysql 下。 - * `mysql-5.7.yaml`:Docker compose 文件,无需修改。默认用户名密码为 root/123456 + * `mysql-5.7.yaml.tpl`:Docker compose 文件模板,无需修改。默认用户名密码为 root/123456 * `mysql-5.7.env`:配置文件,其中可以配置 MySQL container 对外暴露的端口,默认为 3316。 * `init/`:该目录存放的 sql 文件会在 container 创建后自动执行。目前默认会创建库、表并插入少量数据。 - * `data/`:container 启动后挂载的本地数据目录,`start-thirdparties-docker.sh` 脚本会在每次启动时,自动清空并重建这个目录。 + * `data/`:container 启动后挂载的本地数据目录,`run-thirdparties-docker.sh` 脚本会在每次启动时,自动清空并重建这个目录。 2. Postgresql Postgresql 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/postgresql 下。 - * `postgresql-14.yaml`:Docker compose 文件,无需修改。默认用户名密码为 postgres/123456 + * `postgresql-14.yaml.tpl`:Docker compose 文件模板,无需修改。默认用户名密码为 postgres/123456 * `postgresql-14.env`:配置文件,其中可以配置 Postgresql container 对外暴露的端口,默认为 5442。 * `init/`:该目录存放的 sql 文件会在 container 创建后自动执行。目前默认会创建库、表并插入少量数据。 - * `data/`:container 启动后挂载的本地数据目录,`start-thirdparties-docker.sh` 脚本会在每次启动时,自动清空并重建这个目录。 + * `data/`:container 启动后挂载的本地数据目录,`run-thirdparties-docker.sh` 脚本会在每次启动时,自动清空并重建这个目录。 3. Hive Hive 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/hive 下。 - * `hive-2x.yaml`:Docker compose 文件,无需修改。 + * `hive-2x.yaml.tpl`:Docker compose 文件模板,无需修改。 * `hadoop-hive.env.tpl`:配置文件的模板,无需修改。 - * `gen_env.sh`:初始化配置文件的脚本,可以在其中修改:`FS_PORT` 和 `HMS_PORT` 两个对外端口,分别对应 defaultFs 和 Hive metastore 的端口。默认为 8120 和 9183。`start-thirdparties-docker.sh` 启动时会自动调用这个脚本。 + * `gen_env.sh`:初始化配置文件的脚本,可以在其中修改:`FS_PORT` 和 `HMS_PORT` 两个对外端口,分别对应 defaultFs 和 Hive metastore 的端口。默认为 8120 和 9183。`run-thirdparties-docker.sh` 启动时会自动调用这个脚本。 * `scripts/` 目录会在 container 启动后挂载到 container 中。其中的文件内容无需修改。但须注意,在启动 container 之前,需要先下载预制文件: 将 `https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz` 文件下载到 `scripts/` 目录并解压即可。 4. Elasticsearch - 包括 ES6,ES7,ES8 三个版本的 docker 镜像。docker/thirdparties/docker-compose/elasticsearch/ 下。 + 包括 ES6,ES7,ES8 三个版本的 docker 镜像,存放在 docker/thirdparties/docker-compose/elasticsearch/ 下。 - * `es.yaml`:Docker compose文件。包括 ES6,ES7,ES8 三个版本。无需修改。 + * `es.yaml.tpl`:Docker compose 文件模板。包括 ES6,ES7,ES8 三个版本。无需修改。 * `es.env`:配置文件,需配置 ES 的端口号。 * `scripts` 目录下存放了启动镜像后的初始化脚本。 + 5. Oracle + + 提供 Oracle 11 镜像,存放在 docker/thirdparties/docker-compose/oracle/ 下。 + + * `oracle-11.yaml.tpl`:Docker compose 文件模板。无需修改。 + * `oracle-11.env`:配置 Oracle 对外端口,默认为 1521。 + + 6. SQLServer + + 提供 SQLServer 2022 镜像,存放在 docker/thirdparties/docker-compose/sqlserver/ 下。 + + * `sqlserver.yaml.tpl`:Docker compose 文件模板。无需修改。 + * `sqlserver.env`:配置 SQLServer 对外端口,默认为 1433。 + + 7. Iceberg + + 提供 Iceberg + Spark + Minio 镜像组合。存放在 docker/thirdparties/docker-compose/iceberg/ 下。 + + * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。 + * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。 + * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。 + * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。 + + 启动后,可以通过如下命令启动 spark-sql + + `docker exec -it doris-xx-spark-iceberg spark-sql` + + 其中 `doris-xx-spark-iceberg` 为 container 名称。 + + spark-sql iceberg 操作示例: + + ``` + create database db1; + show databases; + craete table db1.test1(k1 bigint, k2 bigint, k3 string) partition by (k1); + insert into db1.test1 values(1,2,'abc'); + select * from db1.test1; + quit; + ``` + + 也可以通过 spark-shell 进行访问: + + ``` + docker exec -it doris-xx-spark-iceberg spark-shell + + spark.sql(s"create database db1") + spark.sql(s"show databases").show() + spark.sql(s"craete table db1.test1(k1 bigint, k2 bigint, k3 string) partition by (k1)").show() + spark.sql(s"show tables from db1").show() + spark.sql(s"insert into db1.test1 values(1,2,'abc')").show() + spark.sql(s"select * from db1.test1").show() + :q + ``` + + 更多使用方式可参阅 [Tabular 官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。 + 2. 运行回归测试 外表相关的回归测试默认是关闭的,可以修改 `regression-test/conf/regression-conf.groovy` 中的以下配置来开启: @@ -664,17 +740,4 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 * `es_7_port`:ES7 的端口。 * `es_8_port`:ES8 的端口。 -3. 如何在同一台机器上启动多套 Container。 - - 如果需要在同一台机器上启动多套回归测试环境对应的 containers,除了修改上面所述的各种对外端口外,还需要修改 `start-thirdparties-docker.sh` 中的 `CONTAINER_UID` 配置。 - - 默认为 `doris--`,可以修改为如:`doris-10002-18sda1-`。 - - 修改后执行 `start-thirdparties-docker.sh`,脚本会将 docker compose 中对应的名称进行替换,这样可以保证多套 containers 环境的容器名称和网络不会冲突。 - - - - - - diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md b/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md index ffd96fd19cae69..031964d8167992 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md @@ -263,7 +263,7 @@ under the License. ### 连接 Elasticsearch -详见 [Elasticsearch Catalog](./elasticsearch) +详见 [Elasticsearch Catalog](./es) ### 连接 JDBC